/* Operates on three CHARSXP (internal) arguments. Returns STRSXP vector. * Returned strings alternate between "outside" and "inside" blocks. * E.g. "Four .(score) and seven .(years)" * returns c("Four ", "score", " and seven ", "years") */ SEXP find_subst_expressions(SEXP str, SEXP begin_delim, SEXP end_delim) { const char *s, *b, *e; const char *p, *start; const char *before, *begin, *end, *after; cetype_t enc; step_t step; int nBlocks, i; SEXP out; assert_type(str, CHARSXP); assert_type(begin_delim, CHARSXP); assert_type(end_delim, CHARSXP); step = get_stepper(str, &s, &enc); if (getCharCE(begin_delim) != enc) { b = Rf_reEnc(CHAR(begin_delim), getCharCE(begin_delim), enc, 0); } else { b = CHAR(begin_delim); } if (getCharCE(end_delim) != enc) { e = Rf_reEnc(CHAR(end_delim), getCharCE(end_delim), enc, 0); } else { e = CHAR(end_delim); } /* Scan once to count how many blocks we need, then scan again (ugh) */ nBlocks = 0; p = s; while(p && *p) { if ( (p = block_search(p, b, e, NULL, NULL, NULL, NULL, step)) ) { nBlocks++; } } out = PROTECT(allocVector(STRSXP, 2*nBlocks+1)); start = s; p = s; for (i = 0; i < nBlocks; i++) { p = block_search(p, b, e, &before, &begin, &end, &after, step); /* extract leading unescaped block, then escaped block */ if (p) { SET_STRING_ELT(out, 2*i, mkCharLenCE(start, before-start, enc)); SET_STRING_ELT(out, 2*i+1, mkCharLenCE(begin, end-begin, enc)); start = after; } } /* then the rest of the string. */ SET_STRING_ELT(out, 2*i, mkCharCE(start, enc)); UNPROTECT(1); return out; }
SEXP R_stri_length(SEXP vec) { int vec_len = LENGTH(vec); SEXP ret = PROTECT(allocVector(INTSXP, vec_len)); int* retint = INTEGER(ret); for (int i = 0; i < vec_len; i++) { SEXP str = STRING_ELT(vec, i); if (str == NA_STRING) { retint[i] = NA_INTEGER; continue; } int str_len = LENGTH(str); if (getCharCE(str) == CE_LATIN1 || (getCharCE(str) == CE_NATIVE && getNativeCE() == CE_LATIN1)) { retint[i] = str_len; } else if (getCharCE(str) == CE_BYTES) { UNPROTECT(1); error("Invalid encoding: bytes."); } else if (getCharCE(str) == CE_UTF8 || (getCharCE(str) == CE_NATIVE && getNativeCE() == CE_UTF8)) { UChar32 out = 0; const char* source = CHAR(str); R_len_t j = 0; int count; for (count = 0; out >= 0 && j < str_len; count++) { U8_NEXT(source, j, str_len, out); // faster that U8_FWD_1 & gives bad UChar32s } if (out < 0) { warning("Invalid UTF8 string: %s", source); retint[i] = NA_INTEGER; } else { retint[i] = count; } } else if (native_is_singlebyte()) { // native-8bit retint[i] = str_len; } else { // native encoding, not 8 bit UErrorCode status = U_ZERO_ERROR; UConverter* conv = ucnv_open(NULL, &status); const char* source = CHAR(str); const char* sourceLimit = source + str_len; int j; for (j = 0; source != sourceLimit; j++) { ucnv_getNextUChar(conv, &source, sourceLimit, &status); } retint[i] = j; // all right, we got it! } } UNPROTECT(1); return ret; }
/* Uses R_alloc but called by a .Internal. Result may be R_alloc-ed */ static const char *trChar(SEXP x) { size_t n = strlen(CHAR(x)); cetype_t ienc = getCharCE(x); if (ienc == CE_BYTES) { const char *p = CHAR(x), *q; char *pp = R_alloc(4*n+1, 1), *qq = pp, buf[5]; for (q = p; *q; q++) { unsigned char k = (unsigned char) *q; if (k >= 0x20 && k < 0x80) { *qq++ = *q; } else { snprintf(buf, 5, "\\x%02x", k); for(int j = 0; j < 4; j++) *qq++ = buf[j]; } } *qq = '\0'; return pp; } else { #ifdef Win32 static char buf[106]; char *p; /* Long strings will be rare, and few per cat() call so we can afford to be profligate here: translateChar is */ if (n < 100) p = buf; else p = R_alloc(n+7, 1); if (WinUTF8out && ienc == CE_UTF8) { strcpy(p, UTF8in); strcat(p, CHAR(x)); strcat(p, UTF8out); return p; } else #endif return translateChar(x); } }
SEXP attribute_hidden do_substr(SEXP call, SEXP op, SEXP args, SEXP env) { SEXP s, x, sa, so, el; R_xlen_t i, len; int start, stop, k, l; size_t slen; cetype_t ienc; const char *ss; char *buf; checkArity(op, args); x = CAR(args); sa = CADR(args); so = CADDR(args); k = LENGTH(sa); l = LENGTH(so); if (!isString(x)) error(_("extracting substrings from a non-character object")); len = XLENGTH(x); PROTECT(s = allocVector(STRSXP, len)); if (len > 0) { if (!isInteger(sa) || !isInteger(so) || k == 0 || l == 0) error(_("invalid substring arguments")); for (i = 0; i < len; i++) { start = INTEGER(sa)[i % k]; stop = INTEGER(so)[i % l]; el = STRING_ELT(x,i); if (el == NA_STRING || start == NA_INTEGER || stop == NA_INTEGER) { SET_STRING_ELT(s, i, NA_STRING); continue; } ienc = getCharCE(el); ss = CHAR(el); slen = strlen(ss); /* FIXME -- should handle embedded nuls */ buf = R_AllocStringBuffer(slen+1, &cbuff); if (start < 1) start = 1; if (start > stop || start > slen) { buf[0] = '\0'; } else { if (stop > slen) stop = (int) slen; substr(buf, ss, ienc, start, stop); } SET_STRING_ELT(s, i, mkCharCE(buf, ienc)); } R_FreeStringBufferL(&cbuff); } DUPLICATE_ATTRIB(s, x); /* This copied the class, if any */ UNPROTECT(1); return s; }
/* Match what EncodeString does with encodings */ attribute_hidden int Rstrlen(SEXP s, int quote) { cetype_t ienc = getCharCE(s); if (ienc == CE_UTF8 || ienc == CE_BYTES) return Rstrwid(CHAR(s), LENGTH(s), ienc, quote); const void *vmax = vmaxget(); const char *p = translateChar(s); int len = Rstrwid(p, (int)strlen(p), CE_NATIVE, quote); vmaxset(vmax); return len; }
// formerly in src/main/platform.c SEXP fileedit(SEXP call, SEXP op, SEXP args, SEXP rho) { SEXP fn, ti, ed; const char **f, **title, *editor; int i, n; const void *vmax = vmaxget(); args = CDR(args); fn = CAR(args); args = CDR(args); ti = CAR(args); args = CDR(args); ed = CAR(args); n = length(fn); if (!isString(ed) || length(ed) != 1) error(_("invalid '%s' specification"), "editor"); if (n > 0) { if (!isString(fn)) error(_("invalid '%s' specification"), "filename"); f = (const char**) R_alloc(n, sizeof(char*)); title = (const char**) R_alloc(n, sizeof(char*)); /* FIXME convert to UTF-8 on Windows */ for (i = 0; i < n; i++) { SEXP el = STRING_ELT(fn, 0); if (!isNull(el)) #ifdef Win32 f[i] = acopy_string(reEnc(CHAR(el), getCharCE(el), CE_UTF8, 1)); #else f[i] = acopy_string(translateChar(el)); #endif else f[i] = ""; if (!isNull(STRING_ELT(ti, i))) title[i] = acopy_string(translateChar(STRING_ELT(ti, i))); else title[i] = ""; } }
/* utils::shortPathName */ SEXP in_shortpath(SEXP paths) { SEXP ans, el; int i, n = LENGTH(paths); char tmp[MAX_PATH]; wchar_t wtmp[32768]; DWORD res; const void *vmax = vmaxget(); if(!isString(paths)) error(_("'path' must be a character vector")); PROTECT(ans = allocVector(STRSXP, n)); for (i = 0; i < n; i++) { el = STRING_ELT(paths, i); if(getCharCE(el) == CE_UTF8) { res = GetShortPathNameW(filenameToWchar(el, FALSE), wtmp, 32768); if (res && res <= 32768) wcstoutf8(tmp, wtmp, wcslen(wtmp)+1); else strcpy(tmp, translateChar(el)); /* documented to return paths using \, which the API call does not necessarily do */ R_fixbackslash(tmp); SET_STRING_ELT(ans, i, mkCharCE(tmp, CE_UTF8)); } else { res = GetShortPathName(translateChar(el), tmp, MAX_PATH); if (res == 0 || res > MAX_PATH) strcpy(tmp, translateChar(el)); /* documented to return paths using \, which the API call does not necessarily do */ R_fixbackslash(tmp); SET_STRING_ELT(ans, i, mkChar(tmp)); } } UNPROTECT(1); vmaxset(vmax); return ans; }
/* GetNativeSystemInfo is XP or later */ pGNSI = (PGNSI) GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetNativeSystemInfo"); if(NULL != pGNSI) pGNSI(&si); else GetSystemInfo(&si); if(si.wProcessorArchitecture == PROCESSOR_ARCHITECTURE_AMD64) strcat(ver, " x64"); } SET_STRING_ELT(ans, 1, mkChar(ver)); if((int)osvi.dwMajorVersion >= 5) { if(osvi.wServicePackMajor > 0) snprintf(ver, 256, "build %d, Service Pack %d", LOWORD(osvi.dwBuildNumber), (int) osvi.wServicePackMajor); else snprintf(ver, 256, "build %d", LOWORD(osvi.dwBuildNumber)); } else snprintf(ver, 256, "build %d, %s", LOWORD(osvi.dwBuildNumber), osvi.szCSDVersion); SET_STRING_ELT(ans, 2, mkChar(ver)); GetComputerNameW(name, &namelen); wcstoutf8(buf, name, 1000); SET_STRING_ELT(ans, 3, mkCharCE(buf, CE_UTF8)); #ifdef WIN64 SET_STRING_ELT(ans, 4, mkChar("x86-64")); #else SET_STRING_ELT(ans, 4, mkChar("x86")); #endif GetUserNameW(user, &userlen); wcstoutf8(buf, user, 1000); SET_STRING_ELT(ans, 5, mkCharCE(buf, CE_UTF8)); SET_STRING_ELT(ans, 6, STRING_ELT(ans, 5)); SET_STRING_ELT(ans, 7, STRING_ELT(ans, 5)); PROTECT(ansnames = allocVector(STRSXP, 8)); SET_STRING_ELT(ansnames, 0, mkChar("sysname")); SET_STRING_ELT(ansnames, 1, mkChar("release")); SET_STRING_ELT(ansnames, 2, mkChar("version")); SET_STRING_ELT(ansnames, 3, mkChar("nodename")); SET_STRING_ELT(ansnames, 4, mkChar("machine")); SET_STRING_ELT(ansnames, 5, mkChar("login")); SET_STRING_ELT(ansnames, 6, mkChar("user")); SET_STRING_ELT(ansnames, 7, mkChar("effective_user")); setAttrib(ans, R_NamesSymbol, ansnames); UNPROTECT(2); return ans; } SEXP do_syssleep(SEXP call, SEXP op, SEXP args, SEXP rho) { DWORD mtime; int ntime; double time; checkArity(op, args); time = asReal(CAR(args)); if (ISNAN(time) || time < 0) errorcall(call, _("invalid '%s' value"), "time"); ntime = 1000*(time) + 0.5; while (ntime > 0) { mtime = min(500, ntime); ntime -= mtime; Sleep(mtime); R_ProcessEvents(); } return R_NilValue; } #ifdef LEA_MALLOC #define MALLINFO_FIELD_TYPE size_t struct mallinfo { MALLINFO_FIELD_TYPE arena; /* non-mmapped space allocated from system */ MALLINFO_FIELD_TYPE ordblks; /* number of free chunks */ MALLINFO_FIELD_TYPE smblks; /* number of fastbin blocks */ MALLINFO_FIELD_TYPE hblks; /* number of mmapped regions */ MALLINFO_FIELD_TYPE hblkhd; /* space in mmapped regions */ MALLINFO_FIELD_TYPE usmblks; /* maximum total allocated space */ MALLINFO_FIELD_TYPE fsmblks; /* space available in freed fastbin blocks */ MALLINFO_FIELD_TYPE uordblks; /* total allocated space */ MALLINFO_FIELD_TYPE fordblks; /* total free space */ MALLINFO_FIELD_TYPE keepcost; /* top-most, releasable (via malloc_trim) space */ }; extern R_size_t R_max_memory; struct mallinfo mallinfo(void); #endif SEXP in_memsize(SEXP ssize) { SEXP ans; int maxmem = NA_LOGICAL; if(isLogical(ssize)) maxmem = asLogical(ssize); else if(isReal(ssize)) { R_size_t newmax; double mem = asReal(ssize); if (!R_FINITE(mem)) error(_("incorrect argument")); #ifdef LEA_MALLOC #ifndef WIN64 if(mem >= 4096) error(_("don't be silly!: your machine has a 4Gb address limit")); #endif newmax = mem * 1048576.0; if (newmax < R_max_memory) warning(_("cannot decrease memory limit: ignored")); else R_max_memory = newmax; #endif } else error(_("incorrect argument")); PROTECT(ans = allocVector(REALSXP, 1)); #ifdef LEA_MALLOC if(maxmem == NA_LOGICAL) REAL(ans)[0] = R_max_memory; else if(maxmem) REAL(ans)[0] = mallinfo().usmblks; else REAL(ans)[0] = mallinfo().uordblks; REAL(ans)[0] /= 1048576.0; #else REAL(ans)[0] = NA_REAL; #endif UNPROTECT(1); return ans; } SEXP do_dllversion(SEXP call, SEXP op, SEXP args, SEXP rho) { SEXP path = R_NilValue, ans; const wchar_t *dll; DWORD dwVerInfoSize; DWORD dwVerHnd; checkArity(op, args); path = CAR(args); if(!isString(path) || LENGTH(path) != 1) errorcall(call, _("invalid '%s' argument"), "path"); dll = filenameToWchar(STRING_ELT(path, 0), FALSE); dwVerInfoSize = GetFileVersionInfoSizeW(dll, &dwVerHnd); PROTECT(ans = allocVector(STRSXP, 2)); SET_STRING_ELT(ans, 0, mkChar("")); SET_STRING_ELT(ans, 1, mkChar("")); if (dwVerInfoSize) { BOOL fRet; LPSTR lpstrVffInfo; LPSTR lszVer = NULL; UINT cchVer = 0; lpstrVffInfo = (LPSTR) malloc(dwVerInfoSize); if (GetFileVersionInfoW(dll, 0L, dwVerInfoSize, lpstrVffInfo)) { fRet = VerQueryValue(lpstrVffInfo, TEXT("\\StringFileInfo\\040904E4\\FileVersion"), (LPVOID)&lszVer, &cchVer); if(fRet) SET_STRING_ELT(ans, 0, mkChar(lszVer)); fRet = VerQueryValue(lpstrVffInfo, TEXT("\\StringFileInfo\\040904E4\\R Version"), (LPVOID)&lszVer, &cchVer); if(fRet) SET_STRING_ELT(ans, 1, mkChar(lszVer)); else { fRet = VerQueryValue(lpstrVffInfo, TEXT("\\StringFileInfo\\040904E4\\Compiled under R Version"), (LPVOID)&lszVer, &cchVer); if(fRet) SET_STRING_ELT(ans, 1, mkChar(lszVer)); } } else ans = R_NilValue; free(lpstrVffInfo); } else ans = R_NilValue; UNPROTECT(1); return ans; } int Rwin_rename(const char *from, const char *to) { return (MoveFileEx(from, to, MOVEFILE_REPLACE_EXISTING | MOVEFILE_COPY_ALLOWED | MOVEFILE_WRITE_THROUGH) == 0); } int Rwin_wrename(const wchar_t *from, const wchar_t *to) { return (MoveFileExW(from, to, MOVEFILE_REPLACE_EXISTING | MOVEFILE_COPY_ALLOWED | MOVEFILE_WRITE_THROUGH) == 0); } const char *formatError(DWORD res) { static char buf[1000], *p; FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, res, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), buf, 1000, NULL); p = buf+strlen(buf) -1; if(*p == '\n') *p = '\0'; p = buf+strlen(buf) -1; if(*p == '\r') *p = '\0'; p = buf+strlen(buf) -1; if(*p == '.') *p = '\0'; return buf; } void R_UTF8fixslash(char *s); /* from main/util.c */ SEXP do_normalizepath(SEXP call, SEXP op, SEXP args, SEXP rho) { SEXP ans, paths = CAR(args), el, slash; int i, n = LENGTH(paths), res; char tmp[MAX_PATH], longpath[MAX_PATH], *tmp2; wchar_t wtmp[32768], wlongpath[32768], *wtmp2; int mustWork, fslash = 0; checkArity(op, args); if(!isString(paths)) errorcall(call, _("'path' must be a character vector")); slash = CADR(args); if(!isString(slash) || LENGTH(slash) != 1) errorcall(call, "'winslash' must be a character string"); const char *sl = CHAR(STRING_ELT(slash, 0)); if (strcmp(sl, "/") && strcmp(sl, "\\")) errorcall(call, "'winslash' must be '/' or '\\\\'"); if (strcmp(sl, "/") == 0) fslash = 1; mustWork = asLogical(CADDR(args)); PROTECT(ans = allocVector(STRSXP, n)); for (i = 0; i < n; i++) { int warn = 0; SEXP result; el = STRING_ELT(paths, i); result = el; if(getCharCE(el) == CE_UTF8) { if ((res = GetFullPathNameW(filenameToWchar(el, FALSE), 32768, wtmp, &wtmp2)) && res <= 32768) { if ((res = GetLongPathNameW(wtmp, wlongpath, 32768)) && res <= 32768) { wcstoutf8(longpath, wlongpath, wcslen(wlongpath)+1); if(fslash) R_UTF8fixslash(longpath); result = mkCharCE(longpath, CE_UTF8); } else if(mustWork == 1) { errorcall(call, "path[%d]=\"%s\": %s", i+1, translateChar(el), formatError(GetLastError())); } else { wcstoutf8(tmp, wtmp, wcslen(wtmp)+1); if(fslash) R_UTF8fixslash(tmp); result = mkCharCE(tmp, CE_UTF8); warn = 1; } } else if(mustWork == 1) { errorcall(call, "path[%d]=\"%s\": %s", i+1, translateChar(el), formatError(GetLastError())); } else { if (fslash) { strcpy(tmp, translateCharUTF8(el)); R_UTF8fixslash(tmp); result = mkCharCE(tmp, CE_UTF8); } warn = 1; } if (warn && (mustWork == NA_LOGICAL)) warningcall(call, "path[%d]=\"%ls\": %s", i+1, filenameToWchar(el,FALSE), formatError(GetLastError())); } else { if ((res = GetFullPathName(translateChar(el), MAX_PATH, tmp, &tmp2)) && res <= MAX_PATH) { if ((res = GetLongPathName(tmp, longpath, MAX_PATH)) && res <= MAX_PATH) { if(fslash) R_fixslash(longpath); result = mkChar(longpath); } else if(mustWork == 1) { errorcall(call, "path[%d]=\"%s\": %s", i+1, translateChar(el), formatError(GetLastError())); } else { if(fslash) R_fixslash(tmp); result = mkChar(tmp); warn = 1; } } else if(mustWork == 1) { errorcall(call, "path[%d]=\"%s\": %s", i+1, translateChar(el), formatError(GetLastError())); } else { if (fslash) { strcpy(tmp, translateChar(el)); R_fixslash(tmp); result = mkChar(tmp); } warn = 1; } if (warn && (mustWork == NA_LOGICAL)) warningcall(call, "path[%d]=\"%s\": %s", i+1, translateChar(el), formatError(GetLastError())); } SET_STRING_ELT(ans, i, result); } UNPROTECT(1); return ans; }
const char *EncodeString(SEXP s, int w, int quote, Rprt_adj justify) { int b, b0, i, j, cnt; const char *p; char *q, buf[11]; cetype_t ienc = CE_NATIVE; /* We have to do something like this as the result is returned, and passed on by EncodeElement -- so no way could be end user be responsible for freeing it. However, this is not thread-safe. */ static R_StringBuffer gBuffer = {NULL, 0, BUFSIZE}; R_StringBuffer *buffer = &gBuffer; if (s == NA_STRING) { p = quote ? CHAR(R_print.na_string) : CHAR(R_print.na_string_noquote); cnt = i = (int)(quote ? strlen(CHAR(R_print.na_string)) : strlen(CHAR(R_print.na_string_noquote))); quote = 0; } else { #ifdef Win32 if(WinUTF8out) { ienc = getCharCE(s); if(ienc == CE_UTF8) { p = CHAR(s); i = Rstrlen(s, quote); cnt = LENGTH(s); } else { p = translateChar0(s); if(p == CHAR(s)) { i = Rstrlen(s, quote); cnt = LENGTH(s); } else { cnt = strlen(p); i = Rstrwid(p, cnt, CE_NATIVE, quote); } ienc = CE_NATIVE; } } else #endif { if(IS_BYTES(s)) { p = CHAR(s); cnt = (int) strlen(p); const char *q; char *pp = R_alloc(4*cnt+1, 1), *qq = pp, buf[5]; for (q = p; *q; q++) { unsigned char k = (unsigned char) *q; if (k >= 0x20 && k < 0x80) { *qq++ = *q; if (quote && *q == '"') cnt++; } else { snprintf(buf, 5, "\\x%02x", k); for(j = 0; j < 4; j++) *qq++ = buf[j]; cnt += 3; } } *qq = '\0'; p = pp; i = cnt; } else { p = translateChar(s); if(p == CHAR(s)) { i = Rstrlen(s, quote); cnt = LENGTH(s); } else { cnt = (int) strlen(p); i = Rstrwid(p, cnt, CE_NATIVE, quote); } } } } /* We need enough space for the encoded string, including escapes. Octal encoding turns one byte into four. \u encoding can turn a multibyte into six or ten, but it turns 2/3 into 6, and 4 (and perhaps 5/6) into 10. Let's be wasteful here (the worst case appears to be an MBCS with one byte for an upper-plane Unicode point output as ten bytes, but I doubt that such an MBCS exists: two bytes is plausible). +2 allows for quotes, +6 for UTF_8 escapes. */ q = R_AllocStringBuffer(imax2(5*cnt+8, w), buffer); b = w - i - (quote ? 2 : 0); /* total amount of padding */ if(justify == Rprt_adj_none) b = 0; if(b > 0 && justify != Rprt_adj_left) { b0 = (justify == Rprt_adj_centre) ? b/2 : b; for(i = 0 ; i < b0 ; i++) *q++ = ' '; b -= b0; } if(quote) *q++ = (char) quote; if(mbcslocale || ienc == CE_UTF8) { int j, res; mbstate_t mb_st; wchar_t wc; unsigned int k; /* not wint_t as it might be signed */ #ifndef __STDC_ISO_10646__ Rboolean Unicode_warning = FALSE; #endif if(ienc != CE_UTF8) mbs_init(&mb_st); #ifdef Win32 else if(WinUTF8out) { memcpy(q, UTF8in, 3); q += 3; } #endif for (i = 0; i < cnt; i++) { res = (int)((ienc == CE_UTF8) ? utf8toucs(&wc, p): mbrtowc(&wc, p, MB_CUR_MAX, NULL)); if(res >= 0) { /* res = 0 is a terminator */ k = wc; /* To be portable, treat \0 explicitly */ if(res == 0) {k = 0; wc = L'\0';} if(0x20 <= k && k < 0x7f && iswprint(wc)) { switch(wc) { case L'\\': *q++ = '\\'; *q++ = '\\'; p++; break; case L'\'': case L'"': if(quote == *p) *q++ = '\\'; *q++ = *p++; break; default: for(j = 0; j < res; j++) *q++ = *p++; break; } } else if (k < 0x80) { /* ANSI Escapes */ switch(wc) { case L'\a': *q++ = '\\'; *q++ = 'a'; break; case L'\b': *q++ = '\\'; *q++ = 'b'; break; case L'\f': *q++ = '\\'; *q++ = 'f'; break; case L'\n': *q++ = '\\'; *q++ = 'n'; break; case L'\r': *q++ = '\\'; *q++ = 'r'; break; case L'\t': *q++ = '\\'; *q++ = 't'; break; case L'\v': *q++ = '\\'; *q++ = 'v'; break; case L'\0': *q++ = '\\'; *q++ = '0'; break; default: /* print in octal */ snprintf(buf, 5, "\\%03o", k); for(j = 0; j < 4; j++) *q++ = buf[j]; break; } p++; } else { if(iswprint(wc)) { /* The problem here is that wc may be printable according to the Unicode tables, but it may not be printable on the output device concerned. */ for(j = 0; j < res; j++) *q++ = *p++; } else { #ifndef Win32 # ifndef __STDC_ISO_10646__ Unicode_warning = TRUE; # endif if(k > 0xffff) snprintf(buf, 11, "\\U%08x", k); else #endif snprintf(buf, 11, "\\u%04x", k); j = (int) strlen(buf); memcpy(q, buf, j); q += j; p += res; } i += (res - 1); } } else { /* invalid char */ snprintf(q, 5, "\\x%02x", *((unsigned char *)p)); q += 4; p++; } } #ifndef __STDC_ISO_10646__ if(Unicode_warning) warning(_("it is not known that wchar_t is Unicode on this platform")); #endif } else for (i = 0; i < cnt; i++) { /* ASCII */ if((unsigned char) *p < 0x80) { if(*p != '\t' && isprint((int)*p)) { /* Windows has \t as printable */ switch(*p) { case '\\': *q++ = '\\'; *q++ = '\\'; break; case '\'': case '"': if(quote == *p) *q++ = '\\'; *q++ = *p; break; default: *q++ = *p; break; } } else switch(*p) { /* ANSI Escapes */ case '\a': *q++ = '\\'; *q++ = 'a'; break; case '\b': *q++ = '\\'; *q++ = 'b'; break; case '\f': *q++ = '\\'; *q++ = 'f'; break; case '\n': *q++ = '\\'; *q++ = 'n'; break; case '\r': *q++ = '\\'; *q++ = 'r'; break; case '\t': *q++ = '\\'; *q++ = 't'; break; case '\v': *q++ = '\\'; *q++ = 'v'; break; case '\0': *q++ = '\\'; *q++ = '0'; break; default: /* print in octal */ snprintf(buf, 5, "\\%03o", (unsigned char) *p); for(j = 0; j < 4; j++) *q++ = buf[j]; break; } p++; } else { /* 8 bit char */ #ifdef Win32 /* It seems Windows does not know what is printable! */ *q++ = *p++; #else if(!isprint((int)*p & 0xff)) { /* print in octal */ snprintf(buf, 5, "\\%03o", (unsigned char) *p); for(j = 0; j < 4; j++) *q++ = buf[j]; p++; } else *q++ = *p++; #endif } } #ifdef Win32 if(WinUTF8out && ienc == CE_UTF8) { memcpy(q, UTF8out, 3); q += 3; } #endif if(quote) *q++ = (char) quote; if(b > 0 && justify != Rprt_adj_right) { for(i = 0 ; i < b ; i++) *q++ = ' '; } *q = '\0'; return buffer->data; }
SEXP attribute_hidden do_substrgets(SEXP call, SEXP op, SEXP args, SEXP env) { SEXP s, x, sa, so, value, el, v_el; R_xlen_t i, len; int start, stop, k, l, v; size_t slen; cetype_t ienc, venc; const char *ss, *v_ss; char *buf; const void *vmax; checkArity(op, args); x = CAR(args); sa = CADR(args); so = CADDR(args); value = CADDDR(args); k = LENGTH(sa); l = LENGTH(so); if (!isString(x)) error(_("replacing substrings in a non-character object")); len = LENGTH(x); PROTECT(s = allocVector(STRSXP, len)); if (len > 0) { if (!isInteger(sa) || !isInteger(so) || k == 0 || l == 0) error(_("invalid substring arguments")); v = LENGTH(value); if (!isString(value) || v == 0) error(_("invalid value")); vmax = vmaxget(); for (i = 0; i < len; i++) { el = STRING_ELT(x, i); v_el = STRING_ELT(value, i % v); start = INTEGER(sa)[i % k]; stop = INTEGER(so)[i % l]; if (el == NA_STRING || v_el == NA_STRING || start == NA_INTEGER || stop == NA_INTEGER) { SET_STRING_ELT(s, i, NA_STRING); continue; } ienc = getCharCE(el); ss = CHAR(el); slen = strlen(ss); if (start < 1) start = 1; if (stop > slen) stop = (int) slen; /* SBCS optimization */ if (start > stop) { /* just copy element across */ SET_STRING_ELT(s, i, STRING_ELT(x, i)); } else { int ienc2 = ienc; v_ss = CHAR(v_el); /* is the value in the same encoding? FIXME: could prefer UTF-8 here */ venc = getCharCE(v_el); if (venc != ienc && !strIsASCII(v_ss)) { ss = translateChar(el); slen = strlen(ss); v_ss = translateChar(v_el); ienc2 = CE_NATIVE; } /* might expand under MBCS */ buf = R_AllocStringBuffer(slen+strlen(v_ss), &cbuff); strcpy(buf, ss); substrset(buf, v_ss, ienc2, start, stop); SET_STRING_ELT(s, i, mkCharCE(buf, ienc2)); } vmaxset(vmax); } R_FreeStringBufferL(&cbuff); } UNPROTECT(1); return s; }
SEXP do_system(SEXP call, SEXP op, SEXP args, SEXP rho) { rpipe *fp; char buf[INTERN_BUFSIZE]; const char *fout = "", *ferr = ""; int vis = 0, flag = 2, i = 0, j, ll = 0; SEXP cmd, fin, Stdout, Stderr, tlist = R_NilValue, tchar, rval; int timeout = 0, timedout = 0; checkArity(op, args); cmd = CAR(args); if (!isString(cmd) || LENGTH(cmd) != 1) errorcall(call, _("character string expected as first argument")); args = CDR(args); flag = asInteger(CAR(args)); args = CDR(args); if (flag >= 20) {vis = -1; flag -= 20;} else if (flag >= 10) {vis = 0; flag -= 10;} else vis = 1; fin = CAR(args); if (!isString(fin)) errorcall(call, _("character string expected as third argument")); args = CDR(args); Stdout = CAR(args); args = CDR(args); Stderr = CAR(args); args = CDR(args); timeout = asInteger(CAR(args)); if (timeout == NA_INTEGER || timeout < 0 || timeout > 2000000) /* the limit could be increased, but not much as in milliseconds it has to fit into a 32-bit unsigned integer */ errorcall(call, _("invalid '%s' argument"), "timeout"); if (timeout && !flag) errorcall(call, "Timeout with background running processes is not supported."); if (CharacterMode == RGui) { /* This is a rather conservative approach: if Rgui is launched from a console window it does have standard handles -- but users might well not expect that. */ SetStdHandle(STD_INPUT_HANDLE, INVALID_HANDLE_VALUE); SetStdHandle(STD_OUTPUT_HANDLE, INVALID_HANDLE_VALUE); SetStdHandle(STD_ERROR_HANDLE, INVALID_HANDLE_VALUE); if (TYPEOF(Stdout) == STRSXP) fout = CHAR(STRING_ELT(Stdout, 0)); if (TYPEOF(Stderr) == STRSXP) ferr = CHAR(STRING_ELT(Stderr, 0)); } else { if (flag == 2) flag = 1; /* ignore std.output.on.console */ if (TYPEOF(Stdout) == STRSXP) fout = CHAR(STRING_ELT(Stdout, 0)); else if (asLogical(Stdout) == 0) fout = NULL; if (TYPEOF(Stderr) == STRSXP) ferr = CHAR(STRING_ELT(Stderr, 0)); else if (asLogical(Stderr) == 0) ferr = NULL; } if (flag < 2) { /* Neither intern = TRUE nor show.output.on.console for Rgui */ ll = runcmd_timeout(CHAR(STRING_ELT(cmd, 0)), getCharCE(STRING_ELT(cmd, 0)), flag, vis, CHAR(STRING_ELT(fin, 0)), fout, ferr, timeout, &timedout); } else { /* read stdout +/- stderr from pipe */ int m = 0; if(flag == 2 /* show on console */ || CharacterMode == RGui) m = 3; if(TYPEOF(Stderr) == LGLSXP) m = asLogical(Stderr) ? 2 : 0; if(m && TYPEOF(Stdout) == LGLSXP && asLogical(Stdout)) m = 3; fp = rpipeOpen(CHAR(STRING_ELT(cmd, 0)), getCharCE(STRING_ELT(cmd, 0)), vis, CHAR(STRING_ELT(fin, 0)), m, fout, ferr, timeout); if (!fp) { /* If intern = TRUE generate an error */ if (flag == 3) error(runerror()); ll = NOLAUNCH; } else { /* FIXME: use REPROTECT */ if (flag == 3) { PROTECT(tlist); /* honour intern = FALSE, ignore.stdout = TRUE */ if (m > 0 || (!(TYPEOF(Stdout) == LGLSXP && !asLogical(Stdout)))) for (i = 0; rpipeGets(fp, buf, INTERN_BUFSIZE); i++) { ll = strlen(buf) - 1; if ((ll >= 0) && (buf[ll] == '\n')) buf[ll] = '\0'; tchar = mkChar(buf); UNPROTECT(1); /* tlist */ PROTECT(tlist = CONS(tchar, tlist)); } } else { for (i = 0; rpipeGets(fp, buf, INTERN_BUFSIZE); i++) R_WriteConsole(buf, strlen(buf)); } ll = rpipeClose(fp, &timedout); } } if (timedout) { ll = 124; warningcall(R_NilValue, _("command '%s' timed out"), CHAR(STRING_ELT(cmd, 0))); } else if (flag == 3 && ll) { warningcall(R_NilValue, _("running command '%s' had status %d"), CHAR(STRING_ELT(cmd, 0)), ll); } if (flag == 3) { /* intern = TRUE: convert pairlist to list */ PROTECT(rval = allocVector(STRSXP, i)); for (j = (i - 1); j >= 0; j--) { SET_STRING_ELT(rval, j, CAR(tlist)); tlist = CDR(tlist); } if(ll) { SEXP lsym = install("status"); setAttrib(rval, lsym, ScalarInteger(ll)); } UNPROTECT(2); return rval; } else { rval = ScalarInteger(ll); R_Visible = 0; return rval; } }
SEXP getfmts(SEXP format) { int cnt, v, nfmt; char fmt[MAXLINE+1], bit[MAXLINE+1]; const char *formatString; size_t n, cur, chunk, maxlen = 0; int nthis, nstar; Rboolean use_UTF8; SEXP res = PROTECT(allocVector(STRSXP, MAXNARGS)); #define SET_RESULT(n, s) { \ if (n >= MAXNARGS) error(_("only %d arguments are allowed"), MAXNARGS); \ maxlen = (n) < maxlen ? maxlen : (n) + 1; \ SET_STRING_ELT(res, (n), mkChar(s)); \ } if (!isString(format)) error(_("'fmt' is not a character vector")); nfmt = LENGTH(format); if (nfmt != 1) error(_("'fmt' must be length 1")); use_UTF8 = getCharCE(STRING_ELT(format, 0)) == CE_UTF8; formatString = TRANSLATE_CHAR(format, 0); n = strlen(formatString); if (n > MAXLINE) error(_("'fmt' length exceeds maximal format length %d"), MAXLINE); /* process the format string */ for (cur = 0, cnt = 0; cur < n; cur += chunk) { const char *curFormat = formatString + cur; char *starc; if (formatString[cur] == '%') { /* handle special format command */ if (cur < n - 1 && formatString[cur + 1] == '%') { /* take care of %% in the format */ chunk = 2; strcpy(bit, "%"); } else { /* recognise selected types from Table B-1 of K&R */ /* NB: we deal with "%%" in branch above. */ /* This is MBCS-OK, as we are in a format spec */ /* Include formats c, u, p and n as well as the R formats; this needs to match */ /* C code as well */ chunk = strcspn(curFormat + 1, "diosfeEgGxXaAcupn") + 2; if (cur + chunk > n) error(_("unrecognised format specification '%s'"), curFormat); strncpy(fmt, curFormat, chunk); fmt[chunk] = '\0'; nthis = -1; /* now look for %n$ or %nn$ form */ if (strlen(fmt) > 3 && fmt[1] >= '1' && fmt[1] <= '9') { v = fmt[1] - '0'; if(fmt[2] == '$') { nthis = v-1; memmove(fmt+1, fmt+3, strlen(fmt)-2); } else if(fmt[2] >= '0' && fmt[2] <= '9' && fmt[3] == '$') { v = 10*v + fmt[2] - '0'; nthis = v-1; memmove(fmt+1, fmt+4, strlen(fmt)-3); } } starc = Rf_strchr(fmt, '*'); if (starc) { /* handle * format if present */ nstar = -1; if (strlen(starc) > 3 && starc[1] >= '1' && starc[1] <= '9') { v = starc[1] - '0'; if(starc[2] == '$') { nstar = v-1; memmove(starc+1, starc+3, strlen(starc)-2); } else if(starc[2] >= '0' && starc[2] <= '9' && starc[3] == '$') { v = 10*v + starc[2] - '0'; nstar = v-1; memmove(starc+1, starc+4, strlen(starc)-3); } } if(nstar < 0) { nstar = cnt++; } if (Rf_strchr(starc+1, '*')) error(_("at most one asterisk '*' is supported in each conversion specification")); SET_RESULT(nstar, "*"); } if (fmt[strlen(fmt) - 1] == '%') { } else { if(nthis < 0) { nthis = cnt++; } SET_RESULT(nthis, fmt); } } } else { /* not '%' : handle string part */ char *ch = Rf_strchr(curFormat, '%'); /* MBCS-aware version used */ chunk = (ch) ? (size_t) (ch - curFormat) : strlen(curFormat); strncpy(bit, curFormat, chunk); bit[chunk] = '\0'; } } /* end for ( each chunk ) */ res = xlengthgets(res, maxlen); UNPROTECT(1); return res; }
// count strings x, their prefixes or suffixes // and return counts greater than lower. // // note that for string counting the number of // nodes usually will be much greater than the // number of result strings. however, we could // not determine a better upper bound without // traversing the tree. SEXP R_utf8CountString(SEXP x, SEXP R_n, SEXP R_lower, SEXP R_type, SEXP R_verbose, SEXP R_persistent, SEXP R_useBytes) { if (!persistent && rpn) { cpnfree(rpn); rpn = 0; warning("cleaning up stale state"); } if (isNull(x) || TYPEOF(x) != VECSXP) error("'x' not of type list"); if (isNull(R_n) || TYPEOF(R_n) != INTSXP) error("'n' not of type integer"); if (isNull(R_lower) || TYPEOF(R_lower) != INTSXP) error("'lower' not of type integer"); if (isNull(R_type) || TYPEOF(R_type) != INTSXP) error("'type' not of type integer"); if (isNull(R_verbose) || TYPEOF(R_verbose) != LGLSXP) error("'verbose' not of type logical"); if (isNull(R_persistent) || TYPEOF(R_persistent) != LGLSXP) error("'persistent' not of type logical"); if (isNull(R_useBytes) || TYPEOF(R_useBytes) != LGLSXP) error("'useBytes' not of type logical"); long l, n = 0; int h, i, j, k, type; const unsigned char *c; SEXP r, s; if (!persistent) { known_to_be_utf8 = utf8locale(); known_to_be_latin1 = latin1locale(); use_bytes = *LOGICAL(R_useBytes); } else if (use_bytes != *LOGICAL(R_useBytes)) error("change of useBytes in persistent mode"); else if (known_to_be_utf8 != utf8locale() || known_to_be_latin1 != latin1locale()) error_reset("change of locale in persistent mode"); persistent = LOGICAL(R_persistent)[0]; if (!persistent) { tcnt = INTEGER(R_lower)[0]; if (tcnt < 0) error_reset("'lower' invalid value"); } type = INTEGER(R_type)[0]; switch (type) { case 0: // strings inc = 0; break; case 1: // prefixes case 2: // suffixes case 3: n = INTEGER(R_n)[0]; if (n < 0) error_reset("'n' invalid value"); if (n == 0) return R_NilValue; inc = 1; break; default: error_reset("'type' invalid value"); } #ifdef _TIME_H clock_t t2, t1, t0 = clock(); if (LOGICAL(R_verbose)[0] == TRUE) { Rprintf("counting ..."); #ifdef __DEBUG Rprintf("\n"); #endif } #endif nap = 0; for (i = 0; i < LENGTH(x); i++) { r = VECTOR_ELT(x, i); if (TYPEOF(r) != STRSXP) error_reset("not of type character"); for (j = 0; j < LENGTH(r); j++) { s = STRING_ELT(r, j); l = LENGTH(s); if (s == NA_STRING || !l) continue; #ifdef __TRANSLATE if (!use_bytes) { c = (const unsigned char *) translateChar(s); l = strlen((const char *) c); } else c = (const unsigned char *) CHAR(s); #else c = (const unsigned char *) CHAR(s); #endif if (!use_bytes && known_to_be_utf8 && tau_pcre_valid_utf8(c, l) >= 0) error_reset("not a valid UTF-8 string"); if (type > 1) { if (reverse_copy_utf8(c, l, n) >= 0) error_reset("cannot copy string to buffer"); } else { if (type < 1) n = l; h = 0; for (k = 0; k < l; k++) { if (c[k] == '\0') continue; if (k < __CBUF_SIZE - 1) cbuf[k] = c[k]; else error_reset("cannot copy string to buffer"); if (use_bytes || !known_to_be_utf8 || (c[k] & 0xC0) != 0x80) { h++; if (h > n) break; } } cbuf[k] = 0; } #ifdef __DEBUG Rprintf(" %s\n", cbuf); #endif h = nap + 1; lpn = 0; rpn = cpncount(rpn, cbuf); if (nap != h) error_reset("cannot add string to tree"); if (!inc) { if (lpn) // should never be NULL lpn->count++; } } R_CheckUserInterrupt(); } #ifdef _TIME_H t1 = clock(); if (LOGICAL(R_verbose)[0] == TRUE) { Rprintf(" %i string(s) using %i nodes [%.2fs]\n", nap, ncpn, ((double) t1 - t0) / CLOCKS_PER_SEC); if (!persistent) Rprintf("writing ..."); #ifdef __DEBUG Rprintf("\n"); #endif } #endif if (persistent) return R_NilValue; nap = enc = 0; rval = PROTECT(allocVector(INTSXP, ncpn)); setAttrib(rval, R_NamesSymbol, (nval = allocVector(STRSXP, ncpn))); cpnretprefix(rpn, 0); if (ncpn) { cpnfree(rpn); rpn = 0; error_reset("cannot retrieve count(s)"); } rpn = 0; // reverse the reversed strings if (type == 2) for (i = 0; i < nap; i++) { s = STRING_ELT(nval, i); reverse_copy_utf8((const unsigned char *) CHAR(s), LENGTH(s), -1); SET_STRING_ELT(nval, i, mkCharCE((const char *) cbuf, getCharCE(s))); } #ifdef _TIME_H t2 = clock(); if (LOGICAL(R_verbose)[0] == TRUE) Rprintf(" %i strings [%.2fs]\n", nap, ((double) t2 - t1) / CLOCKS_PER_SEC); #endif // reduce if (nap < LENGTH(rval)) { r = PROTECT(allocVector(INTSXP, nap)); setAttrib(r, R_NamesSymbol, (s = allocVector(STRSXP, nap))); while (nap-- > 0) { INTEGER(r)[nap] = INTEGER(rval)[nap]; SET_STRING_ELT(s, nap, STRING_ELT(nval, nap)); } UNPROTECT(2); return r; } UNPROTECT(1); return rval; }
/* Calculate the bounding rectangle for a string. * x and y assumed to be in INCHES. */ void textRect(double x, double y, SEXP text, int i, const pGEcontext gc, double xadj, double yadj, double rot, pGEDevDesc dd, LRect *r) { /* NOTE that we must work in inches for the angles to be correct */ LLocation bl, br, tr, tl; LLocation tbl, tbr, ttr, ttl; LTransform thisLocation, thisRotation, thisJustification; LTransform tempTransform, transform; double w, h; if (isExpression(text)) { SEXP expr = VECTOR_ELT(text, i % LENGTH(text)); w = fromDeviceWidth(GEExpressionWidth(expr, gc, dd), GE_INCHES, dd); h = fromDeviceHeight(GEExpressionHeight(expr, gc, dd), GE_INCHES, dd); } else { const char* string = CHAR(STRING_ELT(text, i % LENGTH(text))); w = fromDeviceWidth(GEStrWidth(string, (gc->fontface == 5) ? CE_SYMBOL : getCharCE(STRING_ELT(text, i % LENGTH(text))), gc, dd), GE_INCHES, dd); h = fromDeviceHeight(GEStrHeight(string, (gc->fontface == 5) ? CE_SYMBOL : getCharCE(STRING_ELT(text, i % LENGTH(text))), gc, dd), GE_INCHES, dd); } location(0, 0, bl); location(w, 0, br); location(w, h, tr); location(0, h, tl); translation(-xadj*w, -yadj*h, thisJustification); translation(x, y, thisLocation); if (rot != 0) rotation(rot, thisRotation); else identity(thisRotation); /* Position relative to origin of rotation THEN rotate. */ multiply(thisJustification, thisRotation, tempTransform); /* Translate to (x, y) */ multiply(tempTransform, thisLocation, transform); trans(bl, transform, tbl); trans(br, transform, tbr); trans(tr, transform, ttr); trans(tl, transform, ttl); rect(locationX(tbl), locationX(tbr), locationX(ttr), locationX(ttl), locationY(tbl), locationY(tbr), locationY(ttr), locationY(ttl), r); /* For debugging, the following prints out an R statement to draw the * bounding box */ /* Rprintf("\ngrid.lines(c(%f, %f, %f, %f, %f), c(%f, %f, %f, %f, %f), default.units=\"inches\")\n", locationX(tbl), locationX(tbr), locationX(ttr), locationX(ttl), locationX(tbl), locationY(tbl), locationY(tbr), locationY(ttr), locationY(ttl), locationY(tbl) ); */ }
SEXP attribute_hidden do_sprintf(SEXP call, SEXP op, SEXP args, SEXP env) { int i, nargs, cnt, v, thislen, nfmt, nprotect = 0; /* fmt2 is a copy of fmt with '*' expanded. bit will hold numeric formats and %<w>s, so be quite small. */ char fmt[MAXLINE+1], fmt2[MAXLINE+10], *fmtp, bit[MAXLINE+1], *outputString; const char *formatString; size_t n, cur, chunk; SEXP format, _this, a[MAXNARGS], ans /* -Wall */ = R_NilValue; int ns, maxlen, lens[MAXNARGS], nthis, nstar, star_arg = 0; static R_StringBuffer outbuff = {NULL, 0, MAXELTSIZE}; Rboolean has_star, use_UTF8; #define _my_sprintf(_X_) \ { \ int nc = snprintf(bit, MAXLINE+1, fmtp, _X_); \ if (nc > MAXLINE) \ error(_("required resulting string length %d is greater than maximal %d"), \ nc, MAXLINE); \ } nargs = length(args); /* grab the format string */ format = CAR(args); if (!isString(format)) error(_("'fmt' is not a character vector")); nfmt = length(format); if (nfmt == 0) return allocVector(STRSXP, 0); args = CDR(args); nargs--; if(nargs >= MAXNARGS) error(_("only %d arguments are allowed"), MAXNARGS); /* record the args for possible coercion and later re-ordering */ for(i = 0; i < nargs; i++, args = CDR(args)) { SEXPTYPE t_ai; a[i] = CAR(args); if((t_ai = TYPEOF(a[i])) == LANGSXP || t_ai == SYMSXP) /* << maybe add more .. */ error(_("invalid type of argument[%d]: '%s'"), i+1, CHAR(type2str(t_ai))); lens[i] = length(a[i]); if(lens[i] == 0) return allocVector(STRSXP, 0); } #define CHECK_maxlen \ maxlen = nfmt; \ for(i = 0; i < nargs; i++) \ if(maxlen < lens[i]) maxlen = lens[i]; \ if(maxlen % nfmt) \ error(_("arguments cannot be recycled to the same length")); \ for(i = 0; i < nargs; i++) \ if(maxlen % lens[i]) \ error(_("arguments cannot be recycled to the same length")) CHECK_maxlen; outputString = R_AllocStringBuffer(0, &outbuff); /* We do the format analysis a row at a time */ for(ns = 0; ns < maxlen; ns++) { outputString[0] = '\0'; use_UTF8 = getCharCE(STRING_ELT(format, ns % nfmt)) == CE_UTF8; if (!use_UTF8) { for(i = 0; i < nargs; i++) { if (!isString(a[i])) continue; if (getCharCE(STRING_ELT(a[i], ns % lens[i])) == CE_UTF8) { use_UTF8 = TRUE; break; } } } formatString = TRANSLATE_CHAR(format, ns % nfmt); n = strlen(formatString); if (n > MAXLINE) error(_("'fmt' length exceeds maximal format length %d"), MAXLINE); /* process the format string */ for (cur = 0, cnt = 0; cur < n; cur += chunk) { const char *curFormat = formatString + cur, *ss; char *starc; ss = NULL; if (formatString[cur] == '%') { /* handle special format command */ if (cur < n - 1 && formatString[cur + 1] == '%') { /* take care of %% in the format */ chunk = 2; strcpy(bit, "%"); } else { /* recognise selected types from Table B-1 of K&R */ /* NB: we deal with "%%" in branch above. */ /* This is MBCS-OK, as we are in a format spec */ chunk = strcspn(curFormat + 1, "diosfeEgGxXaA") + 2; if (cur + chunk > n) error(_("unrecognised format specification '%s'"), curFormat); strncpy(fmt, curFormat, chunk); fmt[chunk] = '\0'; nthis = -1; /* now look for %n$ or %nn$ form */ if (strlen(fmt) > 3 && fmt[1] >= '1' && fmt[1] <= '9') { v = fmt[1] - '0'; if(fmt[2] == '$') { if(v > nargs) error(_("reference to non-existent argument %d"), v); nthis = v-1; memmove(fmt+1, fmt+3, strlen(fmt)-2); } else if(fmt[2] >= '0' && fmt[2] <= '9' && fmt[3] == '$') { v = 10*v + fmt[2] - '0'; if(v > nargs) error(_("reference to non-existent argument %d"), v); nthis = v-1; memmove(fmt+1, fmt+4, strlen(fmt)-3); } } starc = Rf_strchr(fmt, '*'); if (starc) { /* handle * format if present */ nstar = -1; if (strlen(starc) > 3 && starc[1] >= '1' && starc[1] <= '9') { v = starc[1] - '0'; if(starc[2] == '$') { if(v > nargs) error(_("reference to non-existent argument %d"), v); nstar = v-1; memmove(starc+1, starc+3, strlen(starc)-2); } else if(starc[2] >= '0' && starc[2] <= '9' && starc[3] == '$') { v = 10*v + starc[2] - '0'; if(v > nargs) error(_("reference to non-existent argument %d"), v); nstar = v-1; memmove(starc+1, starc+4, strlen(starc)-3); } } if(nstar < 0) { if (cnt >= nargs) error(_("too few arguments")); nstar = cnt++; } if (Rf_strchr(starc+1, '*')) error(_("at most one asterisk '*' is supported in each conversion specification")); _this = a[nstar]; if(ns == 0 && TYPEOF(_this) == REALSXP) { _this = coerceVector(_this, INTSXP); PROTECT(a[nstar] = _this); nprotect++; } if(TYPEOF(_this) != INTSXP || LENGTH(_this)<1 || INTEGER(_this)[ns % LENGTH(_this)] == NA_INTEGER) error(_("argument for '*' conversion specification must be a number")); star_arg = INTEGER(_this)[ns % LENGTH(_this)]; has_star = TRUE; } else has_star = FALSE; if (fmt[strlen(fmt) - 1] == '%') { /* handle % with formatting options */ if (has_star) snprintf(bit, MAXLINE+1, fmt, star_arg); else strcpy(bit, fmt); /* was sprintf(..) for which some compiler warn */ } else { Rboolean did_this = FALSE; if(nthis < 0) { if (cnt >= nargs) error(_("too few arguments")); nthis = cnt++; } _this = a[nthis]; if (has_star) { size_t nf; char *p, *q = fmt2; for (p = fmt; *p; p++) if (*p == '*') q += sprintf(q, "%d", star_arg); else *q++ = *p; *q = '\0'; nf = strlen(fmt2); if (nf > MAXLINE) error(_("'fmt' length exceeds maximal format length %d"), MAXLINE); fmtp = fmt2; } else fmtp = fmt; #define CHECK_this_length \ PROTECT(_this); \ thislen = length(_this); \ if(thislen == 0) \ error(_("coercion has changed vector length to 0")) /* Now let us see if some minimal coercion would be sensible, but only do so once, for ns = 0: */ if(ns == 0) { SEXP tmp; Rboolean do_check; switch(*findspec(fmtp)) { case 'd': case 'i': case 'o': case 'x': case 'X': if(TYPEOF(_this) == REALSXP) { double r = REAL(_this)[0]; if((double)((int) r) == r) _this = coerceVector(_this, INTSXP); PROTECT(a[nthis] = _this); nprotect++; } break; case 'a': case 'A': case 'e': case 'f': case 'g': case 'E': case 'G': if(TYPEOF(_this) != REALSXP && /* no automatic as.double(<string>) : */ TYPEOF(_this) != STRSXP) { PROTECT(tmp = lang2(install("as.double"), _this)); #define COERCE_THIS_TO_A \ _this = eval(tmp, env); \ UNPROTECT(1); \ PROTECT(a[nthis] = _this); \ nprotect++; \ did_this = TRUE; \ CHECK_this_length; \ do_check = (lens[nthis] == maxlen); \ lens[nthis] = thislen; /* may have changed! */ \ if(do_check && thislen < maxlen) { \ CHECK_maxlen; \ } COERCE_THIS_TO_A } break; case 's': if(TYPEOF(_this) != STRSXP) { /* as.character method might call sprintf() */ size_t nc = strlen(outputString); char *z = Calloc(nc+1, char); strcpy(z, outputString); PROTECT(tmp = lang2(install("as.character"), _this)); COERCE_THIS_TO_A strcpy(outputString, z); Free(z); } break; default: break; } } /* ns == 0 (first-time only) */ if(!did_this) CHECK_this_length; switch(TYPEOF(_this)) { case LGLSXP: { int x = LOGICAL(_this)[ns % thislen]; if (checkfmt(fmtp, "di")) error(_("invalid format '%s'; %s"), fmtp, _("use format %d or %i for logical objects")); if (x == NA_LOGICAL) { fmtp[strlen(fmtp)-1] = 's'; _my_sprintf("NA") } else { _my_sprintf(x) } break; } case INTSXP: { int x = INTEGER(_this)[ns % thislen]; if (checkfmt(fmtp, "dioxX")) error(_("invalid format '%s'; %s"), fmtp, _("use format %d, %i, %o, %x or %X for integer objects")); if (x == NA_INTEGER) { fmtp[strlen(fmtp)-1] = 's'; _my_sprintf("NA") } else { _my_sprintf(x) } break; }
/* returns string from a CHARSXP making sure that the result is in UTF-8 */ const char *rj_char_utf8(SEXP s) { if (Rf_getCharCE(s) == CE_UTF8) return CHAR(s); return Rf_reEnc(CHAR(s), getCharCE(s), CE_UTF8, 0); /* subst. invalid chars: 1=hex, 2=., 3=?, other=skip */ }
attribute_hidden int Rstrlen(SEXP s, int quote) { return Rstrwid(CHAR(s), LENGTH(s), getCharCE(s), quote); }
SEXP installTrChar(SEXP x) { void * obj; const char *inbuf, *ans = CHAR(x); char *outbuf; size_t inb, outb, res; cetype_t ienc = getCharCE(x); R_StringBuffer cbuff = {NULL, 0, MAXELTSIZE}; if(TYPEOF(x) != CHARSXP) error(_("'%s' must be called on a CHARSXP"), "installTrChar"); if(x == NA_STRING || !(ENC_KNOWN(x))) return install(ans); if(IS_BYTES(x)) error(_("translating strings with \"bytes\" encoding is not allowed")); if(utf8locale && IS_UTF8(x)) return install(ans); if(latin1locale && IS_LATIN1(x)) return install(ans); if(IS_ASCII(x)) return install(ans); if(IS_LATIN1(x)) { if(!latin1_obj) { obj = Riconv_open("", "latin1"); /* should never happen */ if(obj == (void *)(-1)) #ifdef Win32 error(_("unsupported conversion from '%s' in codepage %d"), "latin1", localeCP); #else error(_("unsupported conversion from '%s' to '%s'"), "latin1", ""); #endif latin1_obj = obj; } obj = latin1_obj; } else { if(!utf8_obj) { obj = Riconv_open("", "UTF-8"); /* should never happen */ if(obj == (void *)(-1)) #ifdef Win32 error(_("unsupported conversion from '%s' in codepage %d"), "latin1", localeCP); #else error(_("unsupported conversion from '%s' to '%s'"), "latin1", ""); #endif utf8_obj = obj; } obj = utf8_obj; } R_AllocStringBuffer(0, &cbuff); top_of_loop: inbuf = ans; inb = strlen(inbuf); outbuf = cbuff.data; outb = cbuff.bufsize - 1; /* First initialize output */ Riconv (obj, NULL, NULL, &outbuf, &outb); next_char: /* Then convert input */ res = Riconv(obj, &inbuf , &inb, &outbuf, &outb); if(res == -1 && errno == E2BIG) { R_AllocStringBuffer(2*cbuff.bufsize, &cbuff); goto top_of_loop; } else if(res == -1 && (errno == EILSEQ || errno == EINVAL)) { if(outb < 13) { R_AllocStringBuffer(2*cbuff.bufsize, &cbuff); goto top_of_loop; } if (ienc == CE_UTF8) { /* if starting in UTF-8, use \uxxxx */ /* This must be the first byte */ size_t clen; wchar_t wc; clen = utf8toucs(&wc, inbuf); if(clen > 0 && inb >= clen) { inbuf += clen; inb -= clen; # ifndef Win32 if((unsigned int) wc < 65536) { # endif snprintf(outbuf, 9, "<U+%04X>", (unsigned int) wc); outbuf += 8; outb -= 8; # ifndef Win32 } else { snprintf(outbuf, 13, "<U+%08X>", (unsigned int) wc); outbuf += 12; outb -= 12; } # endif } else { snprintf(outbuf, 5, "<%02x>", (unsigned char)*inbuf); outbuf += 4; outb -= 4; inbuf++; inb--; } } else { snprintf(outbuf, 5, "<%02x>", (unsigned char)*inbuf); outbuf += 4; outb -= 4; inbuf++; inb--; } goto next_char; } *outbuf = '\0'; SEXP Sans = install(cbuff.data); R_FreeStringBuffer(&cbuff); return Sans; }