static int utf8towcs(wchar_t *wc, const char *s, int n) { ssize_t m, res = 0; const char *t; wchar_t *p; wchar_t local; if(wc) for(p = wc, t = s; ; p++, t += m) { m = (ssize_t) utf8toucs(p, t); if (m < 0) Rf_error("invalid input '%s' in 'utf8towcs'", s); if (m == 0) break; res ++; if (res >= n) break; } else for(t = s; ; res++, t += m) { m = (ssize_t) utf8toucs(&local, t); if (m < 0) Rf_error("invalid input '%s' in 'utf8towcs'", s); if (m == 0) break; } return (int) res; }
SEXP attribute_hidden do_nchar(SEXP call, SEXP op, SEXP args, SEXP env) { SEXP d, s, x, stype; int i, len, allowNA; size_t ntype; int nc; const char *type; const char *xi; wchar_t *wc; const void *vmax; checkArity(op, args); if (isFactor(CAR(args))) error(_("'%s' requires a character vector"), "nchar()"); PROTECT(x = coerceVector(CAR(args), STRSXP)); if (!isString(x)) error(_("'%s' requires a character vector"), "nchar()"); len = LENGTH(x); stype = CADR(args); if (!isString(stype) || LENGTH(stype) != 1) error(_("invalid '%s' argument"), "type"); type = CHAR(STRING_ELT(stype, 0)); /* always ASCII */ ntype = strlen(type); if (ntype == 0) error(_("invalid '%s' argument"), "type"); allowNA = asLogical(CADDR(args)); if (allowNA == NA_LOGICAL) allowNA = 0; PROTECT(s = allocVector(INTSXP, len)); vmax = vmaxget(); for (i = 0; i < len; i++) { SEXP sxi = STRING_ELT(x, i); if (sxi == NA_STRING) { INTEGER(s)[i] = 2; continue; } if (strncmp(type, "bytes", ntype) == 0) { INTEGER(s)[i] = LENGTH(sxi); } else if (strncmp(type, "chars", ntype) == 0) { if (IS_UTF8(sxi)) { /* assume this is valid */ const char *p = CHAR(sxi); nc = 0; for( ; *p; p += utf8clen(*p)) nc++; INTEGER(s)[i] = nc; } else if (IS_BYTES(sxi)) { if (!allowNA) /* could do chars 0 */ error(_("number of characters is not computable for element %d in \"bytes\" encoding"), i+1); INTEGER(s)[i] = NA_INTEGER; } else if (mbcslocale) { nc = mbstowcs(NULL, translateChar(sxi), 0); if (!allowNA && nc < 0) error(_("invalid multibyte string %d"), i+1); INTEGER(s)[i] = nc >= 0 ? nc : NA_INTEGER; } else INTEGER(s)[i] = strlen(translateChar(sxi)); } else if (strncmp(type, "width", ntype) == 0) { if (IS_UTF8(sxi)) { /* assume this is valid */ const char *p = CHAR(sxi); wchar_t wc1; nc = 0; for( ; *p; p += utf8clen(*p)) { utf8toucs(&wc1, p); nc += Ri18n_wcwidth(wc1); } INTEGER(s)[i] = nc; } else if (IS_BYTES(sxi)) { if (!allowNA) /* could do width 0 */ error(_("width is not computable for element %d in \"bytes\" encoding"), i+1); INTEGER(s)[i] = NA_INTEGER; } else if (mbcslocale) { xi = translateChar(sxi); nc = mbstowcs(NULL, xi, 0); if (nc >= 0) { wc = (wchar_t *) R_AllocStringBuffer((nc+1)*sizeof(wchar_t), &cbuff); mbstowcs(wc, xi, nc + 1); INTEGER(s)[i] = Ri18n_wcswidth(wc, 2147483647); if (INTEGER(s)[i] < 1) INTEGER(s)[i] = nc; } else if (allowNA) error(_("invalid multibyte string %d"), i+1); else INTEGER(s)[i] = NA_INTEGER; } else INTEGER(s)[i] = strlen(translateChar(sxi)); } else error(_("invalid '%s' argument"), "type"); vmaxset(vmax); } R_FreeStringBufferL(&cbuff); if ((d = getAttrib(x, R_NamesSymbol)) != R_NilValue) setAttrib(s, R_NamesSymbol, d); if ((d = getAttrib(x, R_DimSymbol)) != R_NilValue) setAttrib(s, R_DimSymbol, d); if ((d = getAttrib(x, R_DimNamesSymbol)) != R_NilValue) setAttrib(s, R_DimNamesSymbol, d); UNPROTECT(2); return s; }
int R_nchar(SEXP string, nchar_type type_, Rboolean allowNA, Rboolean keepNA, const char* msg_name) { if (string == NA_STRING) return keepNA ? NA_INTEGER : 2; // else : switch(type_) { case Bytes: return LENGTH(string); break; case Chars: if (IS_UTF8(string)) { const char *p = CHAR(string); if (!utf8Valid(p)) { if (!allowNA) error(_("invalid multibyte string, %s"), msg_name); return NA_INTEGER; } else { int nc = 0; for( ; *p; p += utf8clen(*p)) nc++; return nc; } } else if (IS_BYTES(string)) { if (!allowNA) /* could do chars 0 */ error(_("number of characters is not computable in \"bytes\" encoding, %s"), msg_name); return NA_INTEGER; } else if (mbcslocale) { int nc = (int) mbstowcs(NULL, translateChar(string), 0); if (!allowNA && nc < 0) error(_("invalid multibyte string, %s"), msg_name); return (nc >= 0 ? nc : NA_INTEGER); } else return ((int) strlen(translateChar(string))); break; case Width: if (IS_UTF8(string)) { const char *p = CHAR(string); if (!utf8Valid(p)) { if (!allowNA) error(_("invalid multibyte string, %s"), msg_name); return NA_INTEGER; } else { wchar_t wc1; int nc = 0; for( ; *p; p += utf8clen(*p)) { utf8toucs(&wc1, p); nc += Ri18n_wcwidth(wc1); } return nc; } } else if (IS_BYTES(string)) { if (!allowNA) /* could do width 0 */ error(_("width is not computable for %s in \"bytes\" encoding"), msg_name); return NA_INTEGER; } else if (mbcslocale) { const char *xi = translateChar(string); int nc = (int) mbstowcs(NULL, xi, 0); if (nc >= 0) { const void *vmax = vmaxget(); wchar_t *wc = (wchar_t *) R_AllocStringBuffer((nc+1)*sizeof(wchar_t), &cbuff); mbstowcs(wc, xi, nc + 1); int nci18n = Ri18n_wcswidth(wc, 2147483647); vmaxset(vmax); return (nci18n < 1) ? nc : nci18n; } else if (allowNA) error(_("invalid multibyte string, %s"), msg_name); else return NA_INTEGER; } else return (int) strlen(translateChar(string)); } // switch return NA_INTEGER; // -Wall } // R_nchar()
int main(int argc, char *argv[]) { int argNo, ucs16_opt, utf8_opt, j; int result, remaining, encoded, decoded; if (argc < 2) { printf("Usage: %s (-ucs16 <unicode-hex-string>|-utf8 <utf8-hex-string>|<ascii-chars>)\n", argv[0]); exit(1); } ucs16_opt = 0; utf8_opt = 0; argNo = 1; if (argc > 1) { argNo = 2; if (strcmp(argv[1],"-ucs16") == 0) { ucs16_opt = 1; } else if (strcmp(argv[1],"-utf8") == 0) { utf8_opt = 1; } } if (ucs16_opt) { char* tems = argv[argNo]; int nus = strlen(argv[argNo])/4; int ns = 3*nus; UNICHAR* us = (UNICHAR*) malloc((nus+1)*sizeof(UNICHAR)); unsigned char* s = (unsigned char*) malloc(ns+1); char temus[5] = {0,0,0,0,0}; for (j=0; j<nus; j++) { temus[0] = tems[4*j]; temus[1] = tems[4*j+1]; temus[2] = tems[4*j+2]; temus[3] = tems[4*j+3]; sscanf(temus, "%x", &us[j]); } us[nus] = U_NUL; ucsprint(stderr, us, nus); fprintf(stderr, "\n"); ucstoutf8(us, nus, s, ns, &remaining, &encoded); assert(encoded <= ns); ns = encoded; s[ns] = '\0'; printf("UTF8(%d)=0x", ns); for (j=0; j<ns; j++) { printf("%02x",s[j]); } printf(", remaining=%d\n", remaining); result = utf8toucs(s, ns, us, nus, 0, &remaining, &decoded); assert(decoded <= nus); us[decoded] = U_NUL; printf("UCS(%d)=0x", decoded); for (j=0; j<decoded; j++) { printf("%04x",us[j]); } printf(", remaining=%d, result=%d\n", remaining, result); free(us); free(s); } else { char* tems = argv[argNo]; int ns, nus; char* s; UNICHAR* us; ns = (utf8_opt) ? strlen(argv[argNo])/2 : strlen(argv[argNo]); s = (char*) malloc(ns+1); nus = ns; us = (UNICHAR*) malloc((nus+1)*sizeof(UNICHAR)); if (utf8_opt) { char temstr[3] = {0,0,0}; int ival; for (j=0; j<ns; j++) { temstr[0] = tems[2*j]; temstr[1] = tems[2*j+1]; sscanf(temstr, "%x", &ival); s[j] = (unsigned char) ival; } } else { for (j=0; j<ns; j++) s[j] = tems[j]; } s[ns] = '\0'; result = utf8toucs(s, ns, us, nus, 0, &remaining, &decoded); assert(decoded <= nus); nus = decoded; us[nus] = U_NUL; printf("UCS(%d)=0x", nus); for (j=0; j<nus; j++) { printf("%04x",us[j]); } printf(", remaining=%d, result=%d\n", remaining, result); ucstoutf8(us, nus, s, ns, &remaining, &encoded); assert(encoded <= ns); s[encoded] = '\0'; if (utf8_opt) { printf("UTF8(%d)=0x", encoded); for (j=0; j<encoded; j++) printf("%02x", s[j]); printf(", remaining=%d\n", remaining); } else { printf("UTF8(%d)='%s', remaining=%d\n", encoded, s, remaining); } free(us); free(s); } return 0; }
const char *EncodeString(SEXP s, int w, int quote, Rprt_adj justify) { int b, b0, i, j, cnt; const char *p; char *q, buf[11]; cetype_t ienc = CE_NATIVE; /* We have to do something like this as the result is returned, and passed on by EncodeElement -- so no way could be end user be responsible for freeing it. However, this is not thread-safe. */ static R_StringBuffer gBuffer = {NULL, 0, BUFSIZE}; R_StringBuffer *buffer = &gBuffer; if (s == NA_STRING) { p = quote ? CHAR(R_print.na_string) : CHAR(R_print.na_string_noquote); cnt = i = (int)(quote ? strlen(CHAR(R_print.na_string)) : strlen(CHAR(R_print.na_string_noquote))); quote = 0; } else { #ifdef Win32 if(WinUTF8out) { ienc = getCharCE(s); if(ienc == CE_UTF8) { p = CHAR(s); i = Rstrlen(s, quote); cnt = LENGTH(s); } else { p = translateChar0(s); if(p == CHAR(s)) { i = Rstrlen(s, quote); cnt = LENGTH(s); } else { cnt = strlen(p); i = Rstrwid(p, cnt, CE_NATIVE, quote); } ienc = CE_NATIVE; } } else #endif { if(IS_BYTES(s)) { p = CHAR(s); cnt = (int) strlen(p); const char *q; char *pp = R_alloc(4*cnt+1, 1), *qq = pp, buf[5]; for (q = p; *q; q++) { unsigned char k = (unsigned char) *q; if (k >= 0x20 && k < 0x80) { *qq++ = *q; if (quote && *q == '"') cnt++; } else { snprintf(buf, 5, "\\x%02x", k); for(j = 0; j < 4; j++) *qq++ = buf[j]; cnt += 3; } } *qq = '\0'; p = pp; i = cnt; } else { p = translateChar(s); if(p == CHAR(s)) { i = Rstrlen(s, quote); cnt = LENGTH(s); } else { cnt = (int) strlen(p); i = Rstrwid(p, cnt, CE_NATIVE, quote); } } } } /* We need enough space for the encoded string, including escapes. Octal encoding turns one byte into four. \u encoding can turn a multibyte into six or ten, but it turns 2/3 into 6, and 4 (and perhaps 5/6) into 10. Let's be wasteful here (the worst case appears to be an MBCS with one byte for an upper-plane Unicode point output as ten bytes, but I doubt that such an MBCS exists: two bytes is plausible). +2 allows for quotes, +6 for UTF_8 escapes. */ q = R_AllocStringBuffer(imax2(5*cnt+8, w), buffer); b = w - i - (quote ? 2 : 0); /* total amount of padding */ if(justify == Rprt_adj_none) b = 0; if(b > 0 && justify != Rprt_adj_left) { b0 = (justify == Rprt_adj_centre) ? b/2 : b; for(i = 0 ; i < b0 ; i++) *q++ = ' '; b -= b0; } if(quote) *q++ = (char) quote; if(mbcslocale || ienc == CE_UTF8) { int j, res; mbstate_t mb_st; wchar_t wc; unsigned int k; /* not wint_t as it might be signed */ #ifndef __STDC_ISO_10646__ Rboolean Unicode_warning = FALSE; #endif if(ienc != CE_UTF8) mbs_init(&mb_st); #ifdef Win32 else if(WinUTF8out) { memcpy(q, UTF8in, 3); q += 3; } #endif for (i = 0; i < cnt; i++) { res = (int)((ienc == CE_UTF8) ? utf8toucs(&wc, p): mbrtowc(&wc, p, MB_CUR_MAX, NULL)); if(res >= 0) { /* res = 0 is a terminator */ k = wc; /* To be portable, treat \0 explicitly */ if(res == 0) {k = 0; wc = L'\0';} if(0x20 <= k && k < 0x7f && iswprint(wc)) { switch(wc) { case L'\\': *q++ = '\\'; *q++ = '\\'; p++; break; case L'\'': case L'"': if(quote == *p) *q++ = '\\'; *q++ = *p++; break; default: for(j = 0; j < res; j++) *q++ = *p++; break; } } else if (k < 0x80) { /* ANSI Escapes */ switch(wc) { case L'\a': *q++ = '\\'; *q++ = 'a'; break; case L'\b': *q++ = '\\'; *q++ = 'b'; break; case L'\f': *q++ = '\\'; *q++ = 'f'; break; case L'\n': *q++ = '\\'; *q++ = 'n'; break; case L'\r': *q++ = '\\'; *q++ = 'r'; break; case L'\t': *q++ = '\\'; *q++ = 't'; break; case L'\v': *q++ = '\\'; *q++ = 'v'; break; case L'\0': *q++ = '\\'; *q++ = '0'; break; default: /* print in octal */ snprintf(buf, 5, "\\%03o", k); for(j = 0; j < 4; j++) *q++ = buf[j]; break; } p++; } else { if(iswprint(wc)) { /* The problem here is that wc may be printable according to the Unicode tables, but it may not be printable on the output device concerned. */ for(j = 0; j < res; j++) *q++ = *p++; } else { #ifndef Win32 # ifndef __STDC_ISO_10646__ Unicode_warning = TRUE; # endif if(k > 0xffff) snprintf(buf, 11, "\\U%08x", k); else #endif snprintf(buf, 11, "\\u%04x", k); j = (int) strlen(buf); memcpy(q, buf, j); q += j; p += res; } i += (res - 1); } } else { /* invalid char */ snprintf(q, 5, "\\x%02x", *((unsigned char *)p)); q += 4; p++; } } #ifndef __STDC_ISO_10646__ if(Unicode_warning) warning(_("it is not known that wchar_t is Unicode on this platform")); #endif } else for (i = 0; i < cnt; i++) { /* ASCII */ if((unsigned char) *p < 0x80) { if(*p != '\t' && isprint((int)*p)) { /* Windows has \t as printable */ switch(*p) { case '\\': *q++ = '\\'; *q++ = '\\'; break; case '\'': case '"': if(quote == *p) *q++ = '\\'; *q++ = *p; break; default: *q++ = *p; break; } } else switch(*p) { /* ANSI Escapes */ case '\a': *q++ = '\\'; *q++ = 'a'; break; case '\b': *q++ = '\\'; *q++ = 'b'; break; case '\f': *q++ = '\\'; *q++ = 'f'; break; case '\n': *q++ = '\\'; *q++ = 'n'; break; case '\r': *q++ = '\\'; *q++ = 'r'; break; case '\t': *q++ = '\\'; *q++ = 't'; break; case '\v': *q++ = '\\'; *q++ = 'v'; break; case '\0': *q++ = '\\'; *q++ = '0'; break; default: /* print in octal */ snprintf(buf, 5, "\\%03o", (unsigned char) *p); for(j = 0; j < 4; j++) *q++ = buf[j]; break; } p++; } else { /* 8 bit char */ #ifdef Win32 /* It seems Windows does not know what is printable! */ *q++ = *p++; #else if(!isprint((int)*p & 0xff)) { /* print in octal */ snprintf(buf, 5, "\\%03o", (unsigned char) *p); for(j = 0; j < 4; j++) *q++ = buf[j]; p++; } else *q++ = *p++; #endif } } #ifdef Win32 if(WinUTF8out && ienc == CE_UTF8) { memcpy(q, UTF8out, 3); q += 3; } #endif if(quote) *q++ = (char) quote; if(b > 0 && justify != Rprt_adj_right) { for(i = 0 ; i < b ; i++) *q++ = ' '; } *q = '\0'; return buffer->data; }
/* strlen() using escaped rather than literal form, and allowing for embedded nuls. In MBCS locales it works in characters, and reports in display width. Also used in printarray.c. */ attribute_hidden int Rstrwid(const char *str, int slen, cetype_t ienc, int quote) { const char *p = str; int len = 0, i; if(mbcslocale || ienc == CE_UTF8) { int res; mbstate_t mb_st; wchar_t wc; unsigned int k; /* not wint_t as it might be signed */ if(ienc != CE_UTF8) mbs_init(&mb_st); for (i = 0; i < slen; i++) { res = (ienc == CE_UTF8) ? (int) utf8toucs(&wc, p): (int) mbrtowc(&wc, p, MB_CUR_MAX, NULL); if(res >= 0) { k = wc; if(0x20 <= k && k < 0x7f && iswprint(wc)) { switch(wc) { case L'\\': len += 2; break; case L'\'': case L'"': len += (quote == *p) ? 2 : 1; break; default: len++; /* assumes these are all width 1 */ break; } p++; } else if (k < 0x80) { switch(wc) { case L'\a': case L'\b': case L'\f': case L'\n': case L'\r': case L'\t': case L'\v': case L'\0': len += 2; break; default: /* print in octal */ len += 4; break; } p++; } else { len += iswprint((wint_t)wc) ? Ri18n_wcwidth(wc) : #ifdef Win32 6; #else (k > 0xffff ? 10 : 6); #endif i += (res - 1); p += res; } } else { len += 4; p++; } } } else for (i = 0; i < slen; i++) { /* ASCII */ if((unsigned char) *p < 0x80) { if(isprint((int)*p)) { switch(*p) { case '\\': len += 2; break; case '\'': case '"': len += (quote == *p)? 2 : 1; break; default: len++; break; } } else switch(*p) { case '\a': case '\b': case '\f': case '\n': case '\r': case '\t': case '\v': case '\0': len += 2; break; default: /* print in octal */ len += 4; break; } p++; } else { /* 8 bit char */ #ifdef Win32 /* It seems Windows does not know what is printable! */ len++; #else len += isprint((int)*p) ? 1 : 4; #endif p++; } } return len; }
/* strlen() using escaped rather than literal form. In MBCS locales it works in characters, and reports in display width. Rstrwid is also used in printarray.c. This supported embedded nuls when we had those. */ attribute_hidden int Rstrwid(const char *str, int slen, cetype_t ienc, int quote) { const char *p = str; int len = 0, i; if(ienc == CE_BYTES) { // not currently used for that encoding for (i = 0; i < slen; i++) { unsigned char k = str[i]; if (k >= 0x20 && k < 0x80) len += 1; else len += 4; } return len; } /* Future-proof: currently that is all Rstrlen calls it with, and printarray has CE_NATIVE explicitly */ if(ienc > 2) // CE_NATIVE, CE_UTF8, CE_BYTES are supported warning("unsupported encoding (%d) in Rstrwid", ienc); if(mbcslocale || ienc == CE_UTF8) { int res; mbstate_t mb_st; wchar_t wc; unsigned int k; /* not wint_t as it might be signed */ if(ienc != CE_UTF8) mbs_init(&mb_st); for (i = 0; i < slen; i++) { res = (ienc == CE_UTF8) ? (int) utf8toucs(&wc, p): (int) mbrtowc(&wc, p, MB_CUR_MAX, NULL); if(res >= 0) { k = wc; if(0x20 <= k && k < 0x7f && iswprint(wc)) { switch(wc) { case L'\\': len += 2; break; case L'\'': case L'"': case L'`': len += (quote == *p) ? 2 : 1; break; default: len++; /* assumes these are all width 1 */ break; } p++; } else if (k < 0x80) { switch(wc) { case L'\a': case L'\b': case L'\f': case L'\n': case L'\r': case L'\t': case L'\v': case L'\0': len += 2; break; default: /* print in octal */ len += 4; break; } p++; } else { len += iswprint((wint_t)wc) ? Ri18n_wcwidth(wc) : #ifdef Win32 6; #else (k > 0xffff ? 10 : 6); #endif i += (res - 1); p += res; } } else { len += 4; p++; } } } else // not MBCS nor marked as UTF-8 for (i = 0; i < slen; i++) { if((unsigned char) *p < 0x80) { /* ASCII */ if(isprint((int)*p)) { switch(*p) { case '\\': len += 2; break; case '\'': case '"': case '`': len += (quote == *p)? 2 : 1; break; default: len++; break; } } else switch(*p) { case '\a': case '\b': case '\f': case '\n': case '\r': case '\t': case '\v': case '\0': len += 2; break; default: /* print in octal */ len += 4; break; } p++; } else { /* 8 bit char */ #ifdef Win32 /* It seems Windows does not know what is printable! */ len++; #else len += isprint((int)*p) ? 1 : 4; #endif p++; } } return len; }
SEXP installTrChar(SEXP x) { void * obj; const char *inbuf, *ans = CHAR(x); char *outbuf; size_t inb, outb, res; cetype_t ienc = getCharCE(x); R_StringBuffer cbuff = {NULL, 0, MAXELTSIZE}; if(TYPEOF(x) != CHARSXP) error(_("'%s' must be called on a CHARSXP"), "installTrChar"); if(x == NA_STRING || !(ENC_KNOWN(x))) return install(ans); if(IS_BYTES(x)) error(_("translating strings with \"bytes\" encoding is not allowed")); if(utf8locale && IS_UTF8(x)) return install(ans); if(latin1locale && IS_LATIN1(x)) return install(ans); if(IS_ASCII(x)) return install(ans); if(IS_LATIN1(x)) { if(!latin1_obj) { obj = Riconv_open("", "latin1"); /* should never happen */ if(obj == (void *)(-1)) #ifdef Win32 error(_("unsupported conversion from '%s' in codepage %d"), "latin1", localeCP); #else error(_("unsupported conversion from '%s' to '%s'"), "latin1", ""); #endif latin1_obj = obj; } obj = latin1_obj; } else { if(!utf8_obj) { obj = Riconv_open("", "UTF-8"); /* should never happen */ if(obj == (void *)(-1)) #ifdef Win32 error(_("unsupported conversion from '%s' in codepage %d"), "latin1", localeCP); #else error(_("unsupported conversion from '%s' to '%s'"), "latin1", ""); #endif utf8_obj = obj; } obj = utf8_obj; } R_AllocStringBuffer(0, &cbuff); top_of_loop: inbuf = ans; inb = strlen(inbuf); outbuf = cbuff.data; outb = cbuff.bufsize - 1; /* First initialize output */ Riconv (obj, NULL, NULL, &outbuf, &outb); next_char: /* Then convert input */ res = Riconv(obj, &inbuf , &inb, &outbuf, &outb); if(res == -1 && errno == E2BIG) { R_AllocStringBuffer(2*cbuff.bufsize, &cbuff); goto top_of_loop; } else if(res == -1 && (errno == EILSEQ || errno == EINVAL)) { if(outb < 13) { R_AllocStringBuffer(2*cbuff.bufsize, &cbuff); goto top_of_loop; } if (ienc == CE_UTF8) { /* if starting in UTF-8, use \uxxxx */ /* This must be the first byte */ size_t clen; wchar_t wc; clen = utf8toucs(&wc, inbuf); if(clen > 0 && inb >= clen) { inbuf += clen; inb -= clen; # ifndef Win32 if((unsigned int) wc < 65536) { # endif snprintf(outbuf, 9, "<U+%04X>", (unsigned int) wc); outbuf += 8; outb -= 8; # ifndef Win32 } else { snprintf(outbuf, 13, "<U+%08X>", (unsigned int) wc); outbuf += 12; outb -= 12; } # endif } else { snprintf(outbuf, 5, "<%02x>", (unsigned char)*inbuf); outbuf += 4; outb -= 4; inbuf++; inb--; } } else { snprintf(outbuf, 5, "<%02x>", (unsigned char)*inbuf); outbuf += 4; outb -= 4; inbuf++; inb--; } goto next_char; } *outbuf = '\0'; SEXP Sans = install(cbuff.data); R_FreeStringBuffer(&cbuff); return Sans; }