SEXP installTrChar(SEXP x) { void * obj; const char *inbuf, *ans = CHAR(x); char *outbuf; size_t inb, outb, res; cetype_t ienc = getCharCE(x); R_StringBuffer cbuff = {NULL, 0, MAXELTSIZE}; if(TYPEOF(x) != CHARSXP) error(_("'%s' must be called on a CHARSXP"), "installTrChar"); if(x == NA_STRING || !(ENC_KNOWN(x))) return install(ans); if(IS_BYTES(x)) error(_("translating strings with \"bytes\" encoding is not allowed")); if(utf8locale && IS_UTF8(x)) return install(ans); if(latin1locale && IS_LATIN1(x)) return install(ans); if(IS_ASCII(x)) return install(ans); if(IS_LATIN1(x)) { if(!latin1_obj) { obj = Riconv_open("", "latin1"); /* should never happen */ if(obj == (void *)(-1)) #ifdef Win32 error(_("unsupported conversion from '%s' in codepage %d"), "latin1", localeCP); #else error(_("unsupported conversion from '%s' to '%s'"), "latin1", ""); #endif latin1_obj = obj; } obj = latin1_obj; } else { if(!utf8_obj) { obj = Riconv_open("", "UTF-8"); /* should never happen */ if(obj == (void *)(-1)) #ifdef Win32 error(_("unsupported conversion from '%s' in codepage %d"), "latin1", localeCP); #else error(_("unsupported conversion from '%s' to '%s'"), "latin1", ""); #endif utf8_obj = obj; } obj = utf8_obj; } R_AllocStringBuffer(0, &cbuff); top_of_loop: inbuf = ans; inb = strlen(inbuf); outbuf = cbuff.data; outb = cbuff.bufsize - 1; /* First initialize output */ Riconv (obj, NULL, NULL, &outbuf, &outb); next_char: /* Then convert input */ res = Riconv(obj, &inbuf , &inb, &outbuf, &outb); if(res == -1 && errno == E2BIG) { R_AllocStringBuffer(2*cbuff.bufsize, &cbuff); goto top_of_loop; } else if(res == -1 && (errno == EILSEQ || errno == EINVAL)) { if(outb < 13) { R_AllocStringBuffer(2*cbuff.bufsize, &cbuff); goto top_of_loop; } if (ienc == CE_UTF8) { /* if starting in UTF-8, use \uxxxx */ /* This must be the first byte */ size_t clen; wchar_t wc; clen = utf8toucs(&wc, inbuf); if(clen > 0 && inb >= clen) { inbuf += clen; inb -= clen; # ifndef Win32 if((unsigned int) wc < 65536) { # endif snprintf(outbuf, 9, "<U+%04X>", (unsigned int) wc); outbuf += 8; outb -= 8; # ifndef Win32 } else { snprintf(outbuf, 13, "<U+%08X>", (unsigned int) wc); outbuf += 12; outb -= 12; } # endif } else { snprintf(outbuf, 5, "<%02x>", (unsigned char)*inbuf); outbuf += 4; outb -= 4; inbuf++; inb--; } } else { snprintf(outbuf, 5, "<%02x>", (unsigned char)*inbuf); outbuf += 4; outb -= 4; inbuf++; inb--; } goto next_char; } *outbuf = '\0'; SEXP Sans = install(cbuff.data); R_FreeStringBuffer(&cbuff); return Sans; }
void bmerge_r(int xlowIn, int xuppIn, int ilowIn, int iuppIn, int col, int lowmax, int uppmax) // col is >0 and <=ncol-1 if this range of [xlow,xupp] and [ilow,iupp] match up to but not including that column // lowmax=1 if xlowIn is the lower bound of this group (needed for roll) // uppmax=1 if xuppIn is the upper bound of this group (needed for roll) { int xlow=xlowIn, xupp=xuppIn, ilow=ilowIn, iupp=iuppIn, j, k, ir, lir, tmp; ir = lir = ilow + (iupp-ilow)/2; // lir = logical i row. if (o) ir = o[lir]-1; // ir = the actual i row if i were ordered ic = VECTOR_ELT(i,icols[col]-1); // ic = i column xc = VECTOR_ELT(x,xcols[col]-1); // xc = x column // it was checked in bmerge() that the types are equal switch (TYPEOF(xc)) { case LGLSXP : case INTSXP : // including factors ival.i = INTEGER(ic)[ir]; while(xlow < xupp-1) { mid = xlow + (xupp-xlow)/2; // Same as (xlow+xupp)/2 but without risk of overflow xval.i = INTEGER(xc)[mid]; if (xval.i<ival.i) { // relies on NA_INTEGER == INT_MIN, tested in init.c xlow=mid; } else if (xval.i>ival.i) { // TO DO: is *(&xlow, &xupp)[0|1]=mid more efficient than branch? xupp=mid; } else { // xval.i == ival.i including NA_INTEGER==NA_INTEGER // branch mid to find start and end of this group in this column // TO DO?: not if mult=first|last and col<ncol-1 tmplow = mid; tmpupp = mid; while(tmplow<xupp-1) { mid = tmplow + (xupp-tmplow)/2; xval.i = INTEGER(xc)[mid]; if (xval.i == ival.i) tmplow=mid; else xupp=mid; } while(xlow<tmpupp-1) { mid = xlow + (tmpupp-xlow)/2; xval.i = INTEGER(xc)[mid]; if (xval.i == ival.i) tmpupp=mid; else xlow=mid; } // xlow and xupp now surround the group in xc, we only need this range for the next column break; } } tmplow = lir; tmpupp = lir; while(tmplow<iupp-1) { // TO DO: could double up from lir rather than halving from iupp mid = tmplow + (iupp-tmplow)/2; xval.i = INTEGER(ic)[ o ? o[mid]-1 : mid ]; // reuse xval to search in i if (xval.i == ival.i) tmplow=mid; else iupp=mid; } while(ilow<tmpupp-1) { mid = ilow + (tmpupp-ilow)/2; xval.i = INTEGER(ic)[ o ? o[mid]-1 : mid ]; if (xval.i == ival.i) tmpupp=mid; else ilow=mid; } // ilow and iupp now surround the group in ic, too break; case STRSXP : ival.s = STRING_ELT(ic,ir); while(xlow < xupp-1) { mid = xlow + (xupp-xlow)/2; xval.s = STRING_ELT(xc,mid); if (enc_warn && (ENC_KNOWN(ival.s) || ENC_KNOWN(xval.s))) { // The || is only done here to avoid the warning message being repeating in this code. warning("A known encoding (latin1 or UTF-8) was detected in a join column. data.table compares the bytes currently, so doesn't support *mixed* encodings well; i.e., using both latin1 and UTF-8, or if any unknown encodings are non-ascii and some of those are marked known and others not. But if either latin1 or UTF-8 is used exclusively, and all unknown encodings are ascii, then the result should be ok. In future we will check for you and avoid this warning if everything is ok. The tricky part is doing this without impacting performance for ascii-only cases."); // TO DO: check and warn in forder whether any strings are non-ascii (>127) but unknown encoding // check in forder whether both latin1 and UTF-8 have been used // See bugs #5159 and #5266 and related #5295 to revisit enc_warn = FALSE; // just warn once } tmp = StrCmp(xval.s, ival.s); // uses pointer equality first, NA_STRING are allowed and joined to, then uses strcmp on CHAR(). if (tmp == 0) { // TO DO: deal with mixed encodings and locale optionally tmplow = mid; tmpupp = mid; while(tmplow<xupp-1) { mid = tmplow + (xupp-tmplow)/2; xval.s = STRING_ELT(xc,mid); if (ival.s == xval.s) tmplow=mid; else xupp=mid; // the == here assumes (within this column) no mixing of latin1 and UTF-8, and no unknown non-ascii } // TO DO: add checks to forder, see above. while(xlow<tmpupp-1) { mid = xlow + (tmpupp-xlow)/2; xval.s = STRING_ELT(xc,mid); if (ival.s == xval.s) tmpupp=mid; else xlow=mid; // see above re == } break; } else if (tmp < 0) { xlow=mid; } else { xupp=mid; } } tmplow = lir; tmpupp = lir; while(tmplow<iupp-1) { mid = tmplow + (iupp-tmplow)/2; xval.s = STRING_ELT(ic, o ? o[mid]-1 : mid); if (xval.s == ival.s) tmplow=mid; else iupp=mid; // see above re == } while(ilow<tmpupp-1) { mid = ilow + (tmpupp-ilow)/2; xval.s = STRING_ELT(ic, o ? o[mid]-1 : mid); if (xval.s == ival.s) tmpupp=mid; else ilow=mid; // see above re == } break; case REALSXP : class = getAttrib(xc, R_ClassSymbol); twiddle = (isString(class) && STRING_ELT(class, 0)==char_integer64) ? &i64twiddle : &dtwiddle; ival.ll = twiddle(DATAPTR(ic), ir, 1); while(xlow < xupp-1) { mid = xlow + (xupp-xlow)/2; xval.ll = twiddle(DATAPTR(xc), mid, 1); if (xval.ll<ival.ll) { xlow=mid; } else if (xval.ll>ival.ll) { xupp=mid; } else { // xval.ll == ival.ll) tmplow = mid; tmpupp = mid; while(tmplow<xupp-1) { mid = tmplow + (xupp-tmplow)/2; xval.ll = twiddle(DATAPTR(xc), mid, 1); if (xval.ll == ival.ll) tmplow=mid; else xupp=mid; } while(xlow<tmpupp-1) { mid = xlow + (tmpupp-xlow)/2; xval.ll = twiddle(DATAPTR(xc), mid, 1); if (xval.ll == ival.ll) tmpupp=mid; else xlow=mid; } break; } } tmplow = lir; tmpupp = lir; while(tmplow<iupp-1) { mid = tmplow + (iupp-tmplow)/2; xval.ll = twiddle(DATAPTR(ic), o ? o[mid]-1 : mid, 1 ); if (xval.ll == ival.ll) tmplow=mid; else iupp=mid; } while(ilow<tmpupp-1) { mid = ilow + (tmpupp-ilow)/2; xval.ll = twiddle(DATAPTR(ic), o ? o[mid]-1 : mid, 1 ); if (xval.ll == ival.ll) tmpupp=mid; else ilow=mid; } break; default: error("Type '%s' not supported as key column", type2char(TYPEOF(xc))); } if (xlow<xupp-1) { // if value found, low and upp surround it, unlike standard binary search where low falls on it if (col<ncol-1) bmerge_r(xlow, xupp, ilow, iupp, col+1, 1, 1); // final two 1's are lowmax and uppmax else { int len = xupp-xlow-1; if (len>1) allLen1[0] = FALSE; for (j=ilow+1; j<iupp; j++) { // usually iterates once only for j=ir if (o) k=o[j]-1; else k=j; retFirst[k] = xlow+2; // extra +1 for 1-based indexing at R level retLength[k]= len; } } } else if (roll!=0.0 && col==ncol-1) { // runs once per i row (not each search test), so not hugely time critical if (xlow != xupp-1 || xlow<xlowIn || xupp>xuppIn) error("Internal error: xlow!=xupp-1 || xlow<xlowIn || xupp>xuppIn"); if (nearest) { // value of roll ignored currently when nearest if ( (!lowmax || xlow>xlowIn) && (!uppmax || xupp<xuppIn) ) { if ( ( TYPEOF(ic)==REALSXP && REAL(ic)[ir]-REAL(xc)[xlow] <= REAL(xc)[xupp]-REAL(ic)[ir] ) || ( TYPEOF(ic)<=INTSXP && INTEGER(ic)[ir]-INTEGER(xc)[xlow] <= INTEGER(xc)[xupp]-INTEGER(ic)[ir] )) { retFirst[ir] = xlow+1; retLength[ir] = 1; } else { retFirst[ir] = xupp+1; retLength[ir] = 1; } } else if (uppmax && xupp==xuppIn && rollends[1]) { retFirst[ir] = xlow+1; retLength[ir] = 1; } else if (lowmax && xlow==xlowIn && rollends[0]) { retFirst[ir] = xupp+1; retLength[ir] = 1; } } else { if ( ( (roll>0.0 && (!lowmax || xlow>xlowIn) && (xupp<xuppIn || !uppmax || rollends[1])) || (roll<0.0 && xupp==xuppIn && uppmax && rollends[1]) ) && ( (TYPEOF(ic)==REALSXP && REAL(ic)[ir]-REAL(xc)[xlow]-rollabs<1e-6) || (TYPEOF(ic)<=INTSXP && (double)(INTEGER(ic)[ir]-INTEGER(xc)[xlow])-rollabs<1e-6 ) || (TYPEOF(ic)==STRSXP) )) { retFirst[ir] = xlow+1; retLength[ir] = 1; } else if ( ( (roll<0.0 && (!uppmax || xupp<xuppIn) && (xlow>xlowIn || !lowmax || rollends[0])) || (roll>0.0 && xlow==xlowIn && lowmax && rollends[0]) ) && ( (TYPEOF(ic)==REALSXP && REAL(xc)[xupp]-REAL(ic)[ir]-rollabs<1e-6) || (TYPEOF(ic)<=INTSXP && (double)(INTEGER(xc)[xupp]-INTEGER(ic)[ir])-rollabs<1e-6 ) || (TYPEOF(ic)==STRSXP) )) { retFirst[ir] = xupp+1; // == xlow+2 retLength[ir] = 1; } } if (iupp-ilow > 2 && retFirst[ir]!=NA_INTEGER) { // >=2 equal values in the last column being rolling to the same point. for (j=ilow+1; j<iupp; j++) { // will rewrite retFirst[ir] to itself, but that's ok if (o) k=o[j]-1; else k=j; retFirst[k] = retFirst[ir]; retLength[k]= retLength[ir]; } } } if (ilow>ilowIn && (xlow>xlowIn || (roll!=0.0 && col==ncol-1))) bmerge_r(xlowIn, xlow+1, ilowIn, ilow+1, col, lowmax, uppmax && xlow+1==xuppIn); if (iupp<iuppIn && (xupp<xuppIn || (roll!=0.0 && col==ncol-1))) bmerge_r(xupp-1, xuppIn, iupp-1, iuppIn, col, lowmax && xupp-1==xlowIn, uppmax); }
/* "do_parse" - the user interface input/output to files. The internal R_Parse.. functions are defined in ./gram.y (-> gram.c) .Internal( parse(file, n, text, prompt, srcfile, encoding) ) If there is text then that is read and the other arguments are ignored. */ SEXP attribute_hidden do_parse(SEXP call, SEXP op, SEXP args, SEXP env) { SEXP text, prompt, s, source; Rconnection con; Rboolean wasopen, old_latin1 = known_to_be_latin1, old_utf8 = known_to_be_utf8, allKnown = TRUE; int ifile, num, i; const char *encoding; ParseStatus status; checkArity(op, args); R_ParseError = 0; R_ParseErrorMsg[0] = '\0'; ifile = asInteger(CAR(args)); args = CDR(args); con = getConnection(ifile); wasopen = con->isopen; num = asInteger(CAR(args)); args = CDR(args); if (num == 0) return(allocVector(EXPRSXP, 0)); PROTECT(text = coerceVector(CAR(args), STRSXP)); if(length(CAR(args)) && !length(text)) errorcall(call, _("coercion of 'text' to character was unsuccessful")); args = CDR(args); prompt = CAR(args); args = CDR(args); source = CAR(args); args = CDR(args); if(!isString(CAR(args)) || LENGTH(CAR(args)) != 1) error(_("invalid '%s' value"), "encoding"); encoding = CHAR(STRING_ELT(CAR(args), 0)); /* ASCII */ known_to_be_latin1 = known_to_be_utf8 = FALSE; /* allow 'encoding' to override declaration on 'text'. */ if(streql(encoding, "latin1")) { known_to_be_latin1 = TRUE; allKnown = FALSE; } else if(streql(encoding, "UTF-8")) { known_to_be_utf8 = TRUE; allKnown = FALSE; } else if(!streql(encoding, "unknown") && !streql(encoding, "native.enc")) warning(_("argument '%s = \"%s\"' will be ignored"), "encoding", encoding); if (prompt == R_NilValue) PROTECT(prompt); else PROTECT(prompt = coerceVector(prompt, STRSXP)); if (length(text) > 0) { /* If 'text' has known encoding then we can be sure it will be correctly re-encoded to the current encoding by translateChar in the parser and so could mark the result in a Latin-1 or UTF-8 locale. A small complication is that different elements could have different encodings, but all that matters is that all non-ASCII elements have known encoding. */ for(i = 0; i < length(text); i++) if(!ENC_KNOWN(STRING_ELT(text, i)) && !IS_ASCII(STRING_ELT(text, i))) { allKnown = FALSE; break; } if(allKnown) { known_to_be_latin1 = old_latin1; known_to_be_utf8 = old_utf8; } if (num == NA_INTEGER) num = -1; s = R_ParseVector(text, num, &status, source); if (status != PARSE_OK) parseError(call, R_ParseError); } else if (ifile >= 3) {/* file != "" */ if (num == NA_INTEGER) num = -1; try { if(!wasopen && !con->open(con)) error(_("cannot open the connection")); if(!con->canread) error(_("cannot read from this connection")); s = R_ParseConn(con, num, &status, source); if(!wasopen) con->close(con); } catch (...) { if (!wasopen && con->isopen) con->close(con); throw; } if (status != PARSE_OK) parseError(call, R_ParseError); } else { if (num == NA_INTEGER) num = 1; s = R_ParseBuffer(&R_ConsoleIob, num, &status, prompt, source); if (status != PARSE_OK) parseError(call, R_ParseError); } UNPROTECT(2); known_to_be_latin1 = old_latin1; known_to_be_utf8 = old_utf8; return s; }
/* Note that NA_STRING is not handled separately here. This is deliberate -- see ?paste -- and implicitly coerces it to "NA" */ SEXP attribute_hidden do_paste(SEXP call, SEXP op, SEXP args, SEXP env) { SEXP ans, collapse, sep, x; int sepw, u_sepw, ienc; R_xlen_t i, j, k, maxlen, nx, pwidth; const char *s, *cbuf, *csep=NULL, *u_csep=NULL; char *buf; Rboolean allKnown, anyKnown, use_UTF8, use_Bytes, sepASCII = TRUE, sepUTF8 = FALSE, sepBytes = FALSE, sepKnown = FALSE, use_sep = (PRIMVAL(op) == 0); const void *vmax; checkArity(op, args); /* We use formatting and so we must initialize printing. */ PrintDefaults(); /* Check the arguments */ x = CAR(args); if (!isVectorList(x)) error(_("invalid first argument")); nx = xlength(x); if(use_sep) { /* paste(..., sep, .) */ sep = CADR(args); if (!isString(sep) || LENGTH(sep) <= 0 || STRING_ELT(sep, 0) == NA_STRING) error(_("invalid separator")); sep = STRING_ELT(sep, 0); csep = translateChar(sep); u_sepw = sepw = (int) strlen(csep); // will be short sepASCII = strIsASCII(csep); sepKnown = ENC_KNOWN(sep) > 0; sepUTF8 = IS_UTF8(sep); sepBytes = IS_BYTES(sep); collapse = CADDR(args); } else { /* paste0(..., .) */ u_sepw = sepw = 0; sep = R_NilValue;/* -Wall */ collapse = CADR(args); } if (!isNull(collapse)) if(!isString(collapse) || LENGTH(collapse) <= 0 || STRING_ELT(collapse, 0) == NA_STRING) error(_("invalid '%s' argument"), "collapse"); if(nx == 0) return (!isNull(collapse)) ? mkString("") : allocVector(STRSXP, 0); /* Maximum argument length, coerce if needed */ maxlen = 0; for (j = 0; j < nx; j++) { if (!isString(VECTOR_ELT(x, j))) { /* formerly in R code: moved to C for speed */ SEXP call, xj = VECTOR_ELT(x, j); if(OBJECT(xj)) { /* method dispatch */ PROTECT(call = lang2(install("as.character"), xj)); SET_VECTOR_ELT(x, j, eval(call, env)); UNPROTECT(1); } else if (isSymbol(xj)) SET_VECTOR_ELT(x, j, ScalarString(PRINTNAME(xj))); else SET_VECTOR_ELT(x, j, coerceVector(xj, STRSXP)); if (!isString(VECTOR_ELT(x, j))) error(_("non-string argument to internal 'paste'")); } if(xlength(VECTOR_ELT(x, j)) > maxlen) maxlen = xlength(VECTOR_ELT(x, j)); } if(maxlen == 0) return (!isNull(collapse)) ? mkString("") : allocVector(STRSXP, 0); PROTECT(ans = allocVector(STRSXP, maxlen)); for (i = 0; i < maxlen; i++) { /* Strategy for marking the encoding: if all inputs (including * the separator) are ASCII, so is the output and we don't * need to mark. Otherwise if all non-ASCII inputs are of * declared encoding, we should mark. * Need to be careful only to include separator if it is used. */ anyKnown = FALSE; allKnown = TRUE; use_UTF8 = FALSE; use_Bytes = FALSE; if(nx > 1) { allKnown = sepKnown || sepASCII; anyKnown = sepKnown; use_UTF8 = sepUTF8; use_Bytes = sepBytes; } pwidth = 0; for (j = 0; j < nx; j++) { k = xlength(VECTOR_ELT(x, j)); if (k > 0) { SEXP cs = STRING_ELT(VECTOR_ELT(x, j), i % k); if(IS_UTF8(cs)) use_UTF8 = TRUE; if(IS_BYTES(cs)) use_Bytes = TRUE; } } if (use_Bytes) use_UTF8 = FALSE; vmax = vmaxget(); for (j = 0; j < nx; j++) { k = xlength(VECTOR_ELT(x, j)); if (k > 0) { if(use_Bytes) pwidth += strlen(CHAR(STRING_ELT(VECTOR_ELT(x, j), i % k))); else if(use_UTF8) pwidth += strlen(translateCharUTF8(STRING_ELT(VECTOR_ELT(x, j), i % k))); else pwidth += strlen(translateChar(STRING_ELT(VECTOR_ELT(x, j), i % k))); vmaxset(vmax); } } if(use_sep) { if (use_UTF8 && !u_csep) { u_csep = translateCharUTF8(sep); u_sepw = (int) strlen(u_csep); // will be short } pwidth += (nx - 1) * (use_UTF8 ? u_sepw : sepw); } if (pwidth > INT_MAX) error(_("result would exceed 2^31-1 bytes")); cbuf = buf = R_AllocStringBuffer(pwidth, &cbuff); vmax = vmaxget(); for (j = 0; j < nx; j++) { k = xlength(VECTOR_ELT(x, j)); if (k > 0) { SEXP cs = STRING_ELT(VECTOR_ELT(x, j), i % k); if (use_UTF8) { s = translateCharUTF8(cs); strcpy(buf, s); buf += strlen(s); } else { s = use_Bytes ? CHAR(cs) : translateChar(cs); strcpy(buf, s); buf += strlen(s); allKnown = allKnown && (strIsASCII(s) || (ENC_KNOWN(cs)> 0)); anyKnown = anyKnown || (ENC_KNOWN(cs)> 0); } } if (sepw != 0 && j != nx - 1) { if (use_UTF8) { strcpy(buf, u_csep); buf += u_sepw; } else { strcpy(buf, csep); buf += sepw; } } vmax = vmaxget(); } ienc = 0; if(use_UTF8) ienc = CE_UTF8; else if(use_Bytes) ienc = CE_BYTES; else if(anyKnown && allKnown) { if(known_to_be_latin1) ienc = CE_LATIN1; if(known_to_be_utf8) ienc = CE_UTF8; } SET_STRING_ELT(ans, i, mkCharCE(cbuf, ienc)); } /* Now collapse, if required. */ if(collapse != R_NilValue && (nx = XLENGTH(ans)) > 0) { sep = STRING_ELT(collapse, 0); use_UTF8 = IS_UTF8(sep); use_Bytes = IS_BYTES(sep); for (i = 0; i < nx; i++) { if(IS_UTF8(STRING_ELT(ans, i))) use_UTF8 = TRUE; if(IS_BYTES(STRING_ELT(ans, i))) use_Bytes = TRUE; } if(use_Bytes) { csep = CHAR(sep); use_UTF8 = FALSE; } else if(use_UTF8) csep = translateCharUTF8(sep); else csep = translateChar(sep); sepw = (int) strlen(csep); anyKnown = ENC_KNOWN(sep) > 0; allKnown = anyKnown || strIsASCII(csep); pwidth = 0; vmax = vmaxget(); for (i = 0; i < nx; i++) if(use_UTF8) { pwidth += strlen(translateCharUTF8(STRING_ELT(ans, i))); vmaxset(vmax); } else /* already translated */ pwidth += strlen(CHAR(STRING_ELT(ans, i))); pwidth += (nx - 1) * sepw; if (pwidth > INT_MAX) error(_("result would exceed 2^31-1 bytes")); cbuf = buf = R_AllocStringBuffer(pwidth, &cbuff); vmax = vmaxget(); for (i = 0; i < nx; i++) { if(i > 0) { strcpy(buf, csep); buf += sepw; } if(use_UTF8) s = translateCharUTF8(STRING_ELT(ans, i)); else /* already translated */ s = CHAR(STRING_ELT(ans, i)); strcpy(buf, s); while (*buf) buf++; allKnown = allKnown && (strIsASCII(s) || (ENC_KNOWN(STRING_ELT(ans, i)) > 0)); anyKnown = anyKnown || (ENC_KNOWN(STRING_ELT(ans, i)) > 0); if(use_UTF8) vmaxset(vmax); } UNPROTECT(1); ienc = CE_NATIVE; if(use_UTF8) ienc = CE_UTF8; else if(use_Bytes) ienc = CE_BYTES; else if(anyKnown && allKnown) { if(known_to_be_latin1) ienc = CE_LATIN1; if(known_to_be_utf8) ienc = CE_UTF8; } PROTECT(ans = allocVector(STRSXP, 1)); SET_STRING_ELT(ans, 0, mkCharCE(cbuf, ienc)); } R_FreeStringBufferL(&cbuff); UNPROTECT(1); return ans; }