Beispiel #1
0
static void cat_cleanup(void *data)
{
    cat_info *pci = (cat_info *) data;
    Rconnection con = pci->con;
    Rboolean wasopen = pci->wasopen;
    int changedcon = pci->changedcon;

    con->fflush(con);
    if(changedcon) switch_stdout(-1, 0);
    /* previous line might have closed it */
    if(!wasopen && con->isopen) con->close(con);
#ifdef Win32
    WinUTF8out = FALSE;
#endif
}
Beispiel #2
0
SEXP attribute_hidden do_readDCF(SEXP call, SEXP op, SEXP args, SEXP env)
{
    int nwhat, nret, nc, nr, m, k, lastm, need;
    Rboolean blank_skip, field_skip = FALSE;
    int whatlen, dynwhat, buflen = 8096; // was 100, but that re-alloced often
    char *line, *buf;
    regex_t blankline, contline, trailblank, regline, eblankline;
    regmatch_t regmatch[1];
    SEXP file, what, what2, retval, retval2, dims, dimnames;
    Rconnection con = NULL;
    Rboolean wasopen, is_eblankline;
    RCNTXT cntxt;

    SEXP fold_excludes;
    Rboolean field_fold = TRUE, has_fold_excludes;
    const char *field_name;
    int offset = 0; /* -Wall */

    checkArity(op, args);

    file = CAR(args);
    con = getConnection(asInteger(file));
    wasopen = con->isopen;
    if(!wasopen) {
	if(!con->open(con)) error(_("cannot open the connection"));
	/* Set up a context which will close the connection on error */
	begincontext(&cntxt, CTXT_CCODE, R_NilValue, R_BaseEnv, R_BaseEnv,
		     R_NilValue, R_NilValue);
	cntxt.cend = &con_cleanup;
	cntxt.cenddata = con;
    }
    if(!con->canread) error(_("cannot read from this connection"));

    args = CDR(args);
    PROTECT(what = coerceVector(CAR(args), STRSXP)); /* argument fields */
    nwhat = LENGTH(what);
    dynwhat = (nwhat == 0);

    args = CDR(args);
    PROTECT(fold_excludes = coerceVector(CAR(args), STRSXP));
    has_fold_excludes = (LENGTH(fold_excludes) > 0);

    buf = (char *) malloc(buflen);
    if(!buf) error(_("could not allocate memory for 'read.dcf'"));
    nret = 20;
    /* it is easier if we first have a record per column */
    PROTECT(retval = allocMatrixNA(STRSXP, LENGTH(what), nret));

    /* These used to use [:blank:] but that can match \xa0 as part of
       a UTF-8 character (and is nbspace on Windows). */ 
    tre_regcomp(&blankline, "^[[:blank:]]*$", REG_NOSUB & REG_EXTENDED);
    tre_regcomp(&trailblank, "[ \t]+$", REG_EXTENDED);
    tre_regcomp(&contline, "^[[:blank:]]+", REG_EXTENDED);
    tre_regcomp(&regline, "^[^:]+:[[:blank:]]*", REG_EXTENDED);
    tre_regcomp(&eblankline, "^[[:space:]]+\\.[[:space:]]*$", REG_EXTENDED);

    k = 0;
    lastm = -1; /* index of the field currently being recorded */
    blank_skip = TRUE;
    void *vmax = vmaxget();
    while((line = Rconn_getline2(con))) {
	if(strlen(line) == 0 ||
	   tre_regexecb(&blankline, line, 0, 0, 0) == 0) {
	    /* A blank line.  The first one after a record ends a new
	     * record, subsequent ones are skipped */
	    if(!blank_skip) {
		k++;
		if(k > nret - 1){
		    nret *= 2;
		    PROTECT(retval2 = allocMatrixNA(STRSXP, LENGTH(what), nret));
		    transferVector(retval2, retval);
		    UNPROTECT_PTR(retval);
		    retval = retval2;
		}
		blank_skip = TRUE;
		lastm = -1;
		field_skip = FALSE;
		field_fold = TRUE;
	    }
	} else {
	    blank_skip = FALSE;
	    if(tre_regexecb(&contline, line, 1, regmatch, 0) == 0) {
		/* A continuation line: wrong if at the beginning of a
		   record. */
		if((lastm == -1) && !field_skip) {
		    line[20] = '\0';
		    error(_("Found continuation line starting '%s ...' at begin of record."),
			  line);
		}
		if(lastm >= 0) {
		    need = (int) strlen(CHAR(STRING_ELT(retval,
							lastm + nwhat * k))) + 2;
		    if(tre_regexecb(&eblankline, line, 0, NULL, 0) == 0) {
			is_eblankline = TRUE;
		    } else {
			is_eblankline = FALSE;
			if(field_fold) {
			    offset = regmatch[0].rm_eo;
			    /* Also remove trailing whitespace. */
			    if((tre_regexecb(&trailblank, line, 1,
					     regmatch, 0) == 0))
				line[regmatch[0].rm_so] = '\0';
			} else {
			    offset = 0;
			}
			need += (int) strlen(line + offset);
		    }
		    if(buflen < need) {
			char *tmp = (char *) realloc(buf, need);
			if(!tmp) {
			    free(buf);
			    error(_("could not allocate memory for 'read.dcf'"));
			} else buf = tmp;
			buflen = need;
		    }
		    strcpy(buf,CHAR(STRING_ELT(retval, lastm + nwhat * k)));
		    strcat(buf, "\n");
		    if(!is_eblankline) strcat(buf, line + offset);
		    SET_STRING_ELT(retval, lastm + nwhat * k, mkChar(buf));
		}
	    } else {
		if(tre_regexecb(&regline, line, 1, regmatch, 0) == 0) {
		    for(m = 0; m < nwhat; m++){
			whatlen = (int) strlen(CHAR(STRING_ELT(what, m)));
			if(strlen(line) > whatlen &&
			   line[whatlen] == ':' &&
			   strncmp(CHAR(STRING_ELT(what, m)),
				   line, whatlen) == 0) {
			    /* An already known field we are recording. */
			    lastm = m;
			    field_skip = FALSE;
			    field_name = CHAR(STRING_ELT(what, lastm));
			    if(has_fold_excludes) {
				field_fold =
				    field_is_foldable_p(field_name,
							fold_excludes);
			    }
			    if(field_fold) {
				offset = regmatch[0].rm_eo;
				/* Also remove trailing whitespace. */
				if((tre_regexecb(&trailblank, line, 1,
						 regmatch, 0) == 0))
				    line[regmatch[0].rm_so] = '\0';
			    } else {
				offset = 0;
			    }
			    SET_STRING_ELT(retval, m + nwhat * k,
					   mkChar(line + offset));
			    break;
			} else {
			    /* This is a field, but not one prespecified */
			    lastm = -1;
			    field_skip = TRUE;
			}
		    }
		    if(dynwhat && (lastm == -1)) {
			/* A previously unseen field and we are
			 * recording all fields */
			field_skip = FALSE;
			PROTECT(what2 = allocVector(STRSXP, nwhat+1));
			PROTECT(retval2 = allocMatrixNA(STRSXP,
							nrows(retval)+1,
							ncols(retval)));
			if(nwhat > 0) {
			    copyVector(what2, what);
			    for(nr = 0; nr < nrows(retval); nr++){
				for(nc = 0; nc < ncols(retval); nc++){
				    SET_STRING_ELT(retval2, nr+nc*nrows(retval2),
						   STRING_ELT(retval,
							      nr+nc*nrows(retval)));
				}
			    }
			}
			UNPROTECT_PTR(retval);
			UNPROTECT_PTR(what);
			retval = retval2;
			what = what2;
			/* Make sure enough space was used */
			need = (int) (Rf_strchr(line, ':') - line + 1);
			if(buflen < need){
			    char *tmp = (char *) realloc(buf, need);
			    if(!tmp) {
				free(buf);
				error(_("could not allocate memory for 'read.dcf'"));
			    } else buf = tmp;
			    buflen = need;
			}
			strncpy(buf, line, Rf_strchr(line, ':') - line);
			buf[Rf_strchr(line, ':') - line] = '\0';
			SET_STRING_ELT(what, nwhat, mkChar(buf));
			nwhat++;
			/* lastm uses C indexing, hence nwhat - 1 */
			lastm = nwhat - 1;
			field_name = CHAR(STRING_ELT(what, lastm));
			if(has_fold_excludes) {
			    field_fold =
				field_is_foldable_p(field_name,
						    fold_excludes);
			}
			offset = regmatch[0].rm_eo;
			if(field_fold) {
			    /* Also remove trailing whitespace. */
			    if((tre_regexecb(&trailblank, line, 1,
					     regmatch, 0) == 0))
				line[regmatch[0].rm_so] = '\0';
			}
			SET_STRING_ELT(retval, lastm + nwhat * k,
				       mkChar(line + offset));
		    }
		} else {
		    /* Must be a regular line with no tag ... */
		    line[20] = '\0';
		    error(_("Line starting '%s ...' is malformed!"), line);
		}
	    }
	}
    }
    vmaxset(vmax);
    if(!wasopen) {endcontext(&cntxt); con->close(con);}
    free(buf);
    tre_regfree(&blankline);
    tre_regfree(&contline);
    tre_regfree(&trailblank);
    tre_regfree(&regline);
    tre_regfree(&eblankline);

    if(!blank_skip) k++;

    /* and now transpose the whole matrix */
    PROTECT(retval2 = allocMatrixNA(STRSXP, k, LENGTH(what)));
    copyMatrix(retval2, retval, 1);

    PROTECT(dimnames = allocVector(VECSXP, 2));
    PROTECT(dims = allocVector(INTSXP, 2));
    INTEGER(dims)[0] = k;
    INTEGER(dims)[1] = LENGTH(what);
    SET_VECTOR_ELT(dimnames, 1, what);
    setAttrib(retval2, R_DimSymbol, dims);
    setAttrib(retval2, R_DimNamesSymbol, dimnames);
    UNPROTECT(6);
    return(retval2);
}
Beispiel #3
0
static void con_cleanup(void *data)
{
    Rconnection con = data;
    if(con->isopen) con->close(con);
}
Beispiel #4
0
/* "do_parse" - the user interface input/output to files.

 The internal R_Parse.. functions are defined in ./gram.y (-> gram.c)

 .Internal( parse(file, n, text, prompt, srcfile, encoding) )
 If there is text then that is read and the other arguments are ignored.
*/
SEXP attribute_hidden do_parse(SEXP call, SEXP op, SEXP args, SEXP env)
{
    SEXP text, prompt, s, source;
    Rconnection con;
    Rboolean wasopen, old_latin1 = known_to_be_latin1,
	old_utf8 = known_to_be_utf8, allKnown = TRUE;
    int ifile, num, i;
    const char *encoding;
    ParseStatus status;

    checkArity(op, args);
    R_ParseError = 0;
    R_ParseErrorMsg[0] = '\0';

    ifile = asInteger(CAR(args));                       args = CDR(args);

    con = getConnection(ifile);
    wasopen = con->isopen;
    num = asInteger(CAR(args));				args = CDR(args);
    if (num == 0)
	return(allocVector(EXPRSXP, 0));

    PROTECT(text = coerceVector(CAR(args), STRSXP));
    if(length(CAR(args)) && !length(text))
	errorcall(call, _("coercion of 'text' to character was unsuccessful"));
    args = CDR(args);
    prompt = CAR(args);					args = CDR(args);
    source = CAR(args);					args = CDR(args);
    if(!isString(CAR(args)) || LENGTH(CAR(args)) != 1)
	error(_("invalid '%s' value"), "encoding");
    encoding = CHAR(STRING_ELT(CAR(args), 0)); /* ASCII */
    known_to_be_latin1 = known_to_be_utf8 = FALSE;
    /* allow 'encoding' to override declaration on 'text'. */
    if(streql(encoding, "latin1")) {
	known_to_be_latin1 = TRUE;
	allKnown = FALSE;
    } else if(streql(encoding, "UTF-8"))  {
	known_to_be_utf8 = TRUE;
	allKnown = FALSE;
    } else if(!streql(encoding, "unknown") && !streql(encoding, "native.enc")) 
    	warning(_("argument '%s = \"%s\"' will be ignored"), "encoding", encoding);

    if (prompt == R_NilValue)
	PROTECT(prompt);
    else
	PROTECT(prompt = coerceVector(prompt, STRSXP));

    if (length(text) > 0) {
	/* If 'text' has known encoding then we can be sure it will be
	   correctly re-encoded to the current encoding by
	   translateChar in the parser and so could mark the result in
	   a Latin-1 or UTF-8 locale.

	   A small complication is that different elements could have
	   different encodings, but all that matters is that all
	   non-ASCII elements have known encoding.
	*/
	for(i = 0; i < length(text); i++)
	    if(!ENC_KNOWN(STRING_ELT(text, i)) &&
	       !IS_ASCII(STRING_ELT(text, i))) {
		allKnown = FALSE;
		break;
	    }
	if(allKnown) {
	    known_to_be_latin1 = old_latin1;
	    known_to_be_utf8 = old_utf8;
	}
	if (num == NA_INTEGER) num = -1;
	s = R_ParseVector(text, num, &status, source);
	if (status != PARSE_OK) parseError(call, R_ParseError);
    }
    else if (ifile >= 3) {/* file != "" */
	if (num == NA_INTEGER) num = -1;
	try {
	    if(!wasopen && !con->open(con))
		error(_("cannot open the connection"));
	    if(!con->canread) error(_("cannot read from this connection"));
	    s = R_ParseConn(con, num, &status, source);
	    if(!wasopen) con->close(con);
	} catch (...) {
	    if (!wasopen && con->isopen)
		con->close(con);
	    throw;
	}
	if (status != PARSE_OK) parseError(call, R_ParseError);
    }
    else {
	if (num == NA_INTEGER) num = 1;
	s = R_ParseBuffer(&R_ConsoleIob, num, &status, prompt, source);
	if (status != PARSE_OK) parseError(call, R_ParseError);
    }
    UNPROTECT(2);
    known_to_be_latin1 = old_latin1;
    known_to_be_utf8 = old_utf8;
    return s;
}