/* Used in charcter.c, gnuwin32/console.c */ attribute_hidden int Ri18n_wcswidth (const wchar_t *s, size_t n) { int rs = 0; while ((n-- > 0) && (*s != L'\0')) { int now = Ri18n_wcwidth (*s); if (now == -1) return -1; rs += now; s++; } return rs; }
static void gl_del(int loc) /* * Delete a character. The loc variable can be: * -1 : delete character to left of cursor * 0 : delete character under cursor */ { int i; if(mbcslocale) { int mb_len; mbstate_t mb_st; wchar_t wc; mb_len=0; mbs_init(&mb_st); if ((loc == -1 && gl_pos > 0) || (loc == 0 && gl_pos < gl_cnt)) { for(i = 0; i<= gl_pos + loc;) { mbrtowc(&wc,gl_buf+i, MB_CUR_MAX, &mb_st); mb_len = Ri18n_wcwidth(wc); i += (wc==0) ? 0 : mb_len; } for (i = gl_pos+(loc*mb_len); i <= gl_cnt - mb_len; i++) gl_buf[i] = gl_buf[i + mb_len]; gl_fixup(gl_prompt,gl_pos+(loc * mb_len) , gl_pos+(loc * mb_len)); } else gl_beep(); } else if ((loc == -1 && gl_pos > 0) || (loc == 0 && gl_pos < gl_cnt)) { for (i = gl_pos+loc; i < gl_cnt; i++) gl_buf[i] = gl_buf[i+1]; gl_fixup(gl_prompt, gl_pos+loc, gl_pos+loc); } else gl_beep(); }
int R_nchar(SEXP string, nchar_type type_, Rboolean allowNA, Rboolean keepNA, const char* msg_name) { if (string == NA_STRING) return keepNA ? NA_INTEGER : 2; // else : switch(type_) { case Bytes: return LENGTH(string); break; case Chars: if (IS_UTF8(string)) { const char *p = CHAR(string); if (!utf8Valid(p)) { if (!allowNA) error(_("invalid multibyte string, %s"), msg_name); return NA_INTEGER; } else { int nc = 0; for( ; *p; p += utf8clen(*p)) nc++; return nc; } } else if (IS_BYTES(string)) { if (!allowNA) /* could do chars 0 */ error(_("number of characters is not computable in \"bytes\" encoding, %s"), msg_name); return NA_INTEGER; } else if (mbcslocale) { int nc = (int) mbstowcs(NULL, translateChar(string), 0); if (!allowNA && nc < 0) error(_("invalid multibyte string, %s"), msg_name); return (nc >= 0 ? nc : NA_INTEGER); } else return ((int) strlen(translateChar(string))); break; case Width: if (IS_UTF8(string)) { const char *p = CHAR(string); if (!utf8Valid(p)) { if (!allowNA) error(_("invalid multibyte string, %s"), msg_name); return NA_INTEGER; } else { wchar_t wc1; int nc = 0; for( ; *p; p += utf8clen(*p)) { utf8toucs(&wc1, p); nc += Ri18n_wcwidth(wc1); } return nc; } } else if (IS_BYTES(string)) { if (!allowNA) /* could do width 0 */ error(_("width is not computable for %s in \"bytes\" encoding"), msg_name); return NA_INTEGER; } else if (mbcslocale) { const char *xi = translateChar(string); int nc = (int) mbstowcs(NULL, xi, 0); if (nc >= 0) { const void *vmax = vmaxget(); wchar_t *wc = (wchar_t *) R_AllocStringBuffer((nc+1)*sizeof(wchar_t), &cbuff); mbstowcs(wc, xi, nc + 1); int nci18n = Ri18n_wcswidth(wc, 2147483647); vmaxset(vmax); return (nci18n < 1) ? nc : nci18n; } else if (allowNA) error(_("invalid multibyte string, %s"), msg_name); else return NA_INTEGER; } else return (int) strlen(translateChar(string)); } // switch return NA_INTEGER; // -Wall } // R_nchar()
SEXP attribute_hidden do_nchar(SEXP call, SEXP op, SEXP args, SEXP env) { SEXP d, s, x, stype; int i, len, allowNA; size_t ntype; int nc; const char *type; const char *xi; wchar_t *wc; const void *vmax; checkArity(op, args); if (isFactor(CAR(args))) error(_("'%s' requires a character vector"), "nchar()"); PROTECT(x = coerceVector(CAR(args), STRSXP)); if (!isString(x)) error(_("'%s' requires a character vector"), "nchar()"); len = LENGTH(x); stype = CADR(args); if (!isString(stype) || LENGTH(stype) != 1) error(_("invalid '%s' argument"), "type"); type = CHAR(STRING_ELT(stype, 0)); /* always ASCII */ ntype = strlen(type); if (ntype == 0) error(_("invalid '%s' argument"), "type"); allowNA = asLogical(CADDR(args)); if (allowNA == NA_LOGICAL) allowNA = 0; PROTECT(s = allocVector(INTSXP, len)); vmax = vmaxget(); for (i = 0; i < len; i++) { SEXP sxi = STRING_ELT(x, i); if (sxi == NA_STRING) { INTEGER(s)[i] = 2; continue; } if (strncmp(type, "bytes", ntype) == 0) { INTEGER(s)[i] = LENGTH(sxi); } else if (strncmp(type, "chars", ntype) == 0) { if (IS_UTF8(sxi)) { /* assume this is valid */ const char *p = CHAR(sxi); nc = 0; for( ; *p; p += utf8clen(*p)) nc++; INTEGER(s)[i] = nc; } else if (IS_BYTES(sxi)) { if (!allowNA) /* could do chars 0 */ error(_("number of characters is not computable for element %d in \"bytes\" encoding"), i+1); INTEGER(s)[i] = NA_INTEGER; } else if (mbcslocale) { nc = mbstowcs(NULL, translateChar(sxi), 0); if (!allowNA && nc < 0) error(_("invalid multibyte string %d"), i+1); INTEGER(s)[i] = nc >= 0 ? nc : NA_INTEGER; } else INTEGER(s)[i] = strlen(translateChar(sxi)); } else if (strncmp(type, "width", ntype) == 0) { if (IS_UTF8(sxi)) { /* assume this is valid */ const char *p = CHAR(sxi); wchar_t wc1; nc = 0; for( ; *p; p += utf8clen(*p)) { utf8toucs(&wc1, p); nc += Ri18n_wcwidth(wc1); } INTEGER(s)[i] = nc; } else if (IS_BYTES(sxi)) { if (!allowNA) /* could do width 0 */ error(_("width is not computable for element %d in \"bytes\" encoding"), i+1); INTEGER(s)[i] = NA_INTEGER; } else if (mbcslocale) { xi = translateChar(sxi); nc = mbstowcs(NULL, xi, 0); if (nc >= 0) { wc = (wchar_t *) R_AllocStringBuffer((nc+1)*sizeof(wchar_t), &cbuff); mbstowcs(wc, xi, nc + 1); INTEGER(s)[i] = Ri18n_wcswidth(wc, 2147483647); if (INTEGER(s)[i] < 1) INTEGER(s)[i] = nc; } else if (allowNA) error(_("invalid multibyte string %d"), i+1); else INTEGER(s)[i] = NA_INTEGER; } else INTEGER(s)[i] = strlen(translateChar(sxi)); } else error(_("invalid '%s' argument"), "type"); vmaxset(vmax); } R_FreeStringBufferL(&cbuff); if ((d = getAttrib(x, R_NamesSymbol)) != R_NilValue) setAttrib(s, R_NamesSymbol, d); if ((d = getAttrib(x, R_DimSymbol)) != R_NilValue) setAttrib(s, R_DimSymbol, d); if ((d = getAttrib(x, R_DimNamesSymbol)) != R_NilValue) setAttrib(s, R_DimNamesSymbol, d); UNPROTECT(2); return s; }
/* strlen() using escaped rather than literal form, and allowing for embedded nuls. In MBCS locales it works in characters, and reports in display width. Also used in printarray.c. */ attribute_hidden int Rstrwid(const char *str, int slen, cetype_t ienc, int quote) { const char *p = str; int len = 0, i; if(mbcslocale || ienc == CE_UTF8) { int res; mbstate_t mb_st; wchar_t wc; unsigned int k; /* not wint_t as it might be signed */ if(ienc != CE_UTF8) mbs_init(&mb_st); for (i = 0; i < slen; i++) { res = (ienc == CE_UTF8) ? (int) utf8toucs(&wc, p): (int) mbrtowc(&wc, p, MB_CUR_MAX, NULL); if(res >= 0) { k = wc; if(0x20 <= k && k < 0x7f && iswprint(wc)) { switch(wc) { case L'\\': len += 2; break; case L'\'': case L'"': len += (quote == *p) ? 2 : 1; break; default: len++; /* assumes these are all width 1 */ break; } p++; } else if (k < 0x80) { switch(wc) { case L'\a': case L'\b': case L'\f': case L'\n': case L'\r': case L'\t': case L'\v': case L'\0': len += 2; break; default: /* print in octal */ len += 4; break; } p++; } else { len += iswprint((wint_t)wc) ? Ri18n_wcwidth(wc) : #ifdef Win32 6; #else (k > 0xffff ? 10 : 6); #endif i += (res - 1); p += res; } } else { len += 4; p++; } } } else for (i = 0; i < slen; i++) { /* ASCII */ if((unsigned char) *p < 0x80) { if(isprint((int)*p)) { switch(*p) { case '\\': len += 2; break; case '\'': case '"': len += (quote == *p)? 2 : 1; break; default: len++; break; } } else switch(*p) { case '\a': case '\b': case '\f': case '\n': case '\r': case '\t': case '\v': case '\0': len += 2; break; default: /* print in octal */ len += 4; break; } p++; } else { /* 8 bit char */ #ifdef Win32 /* It seems Windows does not know what is printable! */ len++; #else len += isprint((int)*p) ? 1 : 4; #endif p++; } } return len; }
/* strlen() using escaped rather than literal form. In MBCS locales it works in characters, and reports in display width. Rstrwid is also used in printarray.c. This supported embedded nuls when we had those. */ attribute_hidden int Rstrwid(const char *str, int slen, cetype_t ienc, int quote) { const char *p = str; int len = 0, i; if(ienc == CE_BYTES) { // not currently used for that encoding for (i = 0; i < slen; i++) { unsigned char k = str[i]; if (k >= 0x20 && k < 0x80) len += 1; else len += 4; } return len; } /* Future-proof: currently that is all Rstrlen calls it with, and printarray has CE_NATIVE explicitly */ if(ienc > 2) // CE_NATIVE, CE_UTF8, CE_BYTES are supported warning("unsupported encoding (%d) in Rstrwid", ienc); if(mbcslocale || ienc == CE_UTF8) { int res; mbstate_t mb_st; wchar_t wc; unsigned int k; /* not wint_t as it might be signed */ if(ienc != CE_UTF8) mbs_init(&mb_st); for (i = 0; i < slen; i++) { res = (ienc == CE_UTF8) ? (int) utf8toucs(&wc, p): (int) mbrtowc(&wc, p, MB_CUR_MAX, NULL); if(res >= 0) { k = wc; if(0x20 <= k && k < 0x7f && iswprint(wc)) { switch(wc) { case L'\\': len += 2; break; case L'\'': case L'"': case L'`': len += (quote == *p) ? 2 : 1; break; default: len++; /* assumes these are all width 1 */ break; } p++; } else if (k < 0x80) { switch(wc) { case L'\a': case L'\b': case L'\f': case L'\n': case L'\r': case L'\t': case L'\v': case L'\0': len += 2; break; default: /* print in octal */ len += 4; break; } p++; } else { len += iswprint((wint_t)wc) ? Ri18n_wcwidth(wc) : #ifdef Win32 6; #else (k > 0xffff ? 10 : 6); #endif i += (res - 1); p += res; } } else { len += 4; p++; } } } else // not MBCS nor marked as UTF-8 for (i = 0; i < slen; i++) { if((unsigned char) *p < 0x80) { /* ASCII */ if(isprint((int)*p)) { switch(*p) { case '\\': len += 2; break; case '\'': case '"': case '`': len += (quote == *p)? 2 : 1; break; default: len++; break; } } else switch(*p) { case '\a': case '\b': case '\f': case '\n': case '\r': case '\t': case '\v': case '\0': len += 2; break; default: /* print in octal */ len += 4; break; } p++; } else { /* 8 bit char */ #ifdef Win32 /* It seems Windows does not know what is printable! */ len++; #else len += isprint((int)*p) ? 1 : 4; #endif p++; } } return len; }
int getline(const char *prompt, char *buf, int buflen) { int c, loc, tmp; int mb_len; mbstate_t mb_st; int i; wchar_t wc; BUF_SIZE = buflen; gl_buf = buf; gl_buf[0] = '\0'; if (setjmp(gl_jmp)) { gl_newline(); gl_cleanup(); return 0; } gl_init(); gl_pos = 0; gl_prompt = (prompt)? prompt : ""; if (gl_in_hook) gl_in_hook(gl_buf); gl_fixup(gl_prompt, -2, BUF_SIZE); while ((c = gl_getc()) >= 0) { gl_extent = 0; /* reset to full extent */ if (!iscntrl(c)) { if (gl_search_mode) search_addchar(c); else gl_addchar(c); } else { if (gl_search_mode) { if (c == '\033' || c == '\016' || c == '\020') { search_term(); c = 0; /* ignore the character */ } else if (c == '\010' || c == '\177') { search_addchar(-1); /* unwind search string */ c = 0; } else if (c != '\022' && c != '\023') { search_term(); /* terminate and handle char */ } } switch (c) { case '\n': case '\r': /* newline */ gl_newline(); gl_cleanup(); return 0; /*NOTREACHED*/ break; case '\001': gl_fixup(gl_prompt, -1, 0); /* ^A */ break; case '\002': /* ^B */ if(mbcslocale) { mb_len = 0; mbs_init(&mb_st); for(i = 0; i < gl_pos ;) { mbrtowc(&wc, gl_buf+i, MB_CUR_MAX, &mb_st); mb_len = Ri18n_wcwidth(wc); i += (wc==0) ? 0 : mb_len; } gl_fixup(gl_prompt, -1, gl_pos - mb_len); } else gl_fixup(gl_prompt, -1, gl_pos-1); break; case '\003': /* ^C */ gl_fixup(gl_prompt, -1, gl_cnt); gl_puts("^C\n"); gl_kill(0); gl_fixup(gl_prompt, -2, BUF_SIZE); break; case '\004': /* ^D */ if (gl_cnt == 0) { gl_buf[0] = 0; gl_cleanup(); gl_putc('\n'); return 0; } else { gl_del(0); } break; case '\005': gl_fixup(gl_prompt, -1, gl_cnt); /* ^E */ break; case '\006': /* ^F */ if(mbcslocale) { if(gl_pos >= gl_cnt) break; mb_len = 0; mbs_init(&mb_st); for(i = 0; i<= gl_pos ;){ mbrtowc(&wc, gl_buf+i, MB_CUR_MAX, &mb_st); mb_len = Ri18n_wcwidth(wc); i += (wc==0) ? 0 : mb_len; } gl_fixup(gl_prompt, -1, gl_pos + mb_len); } else gl_fixup(gl_prompt, -1, gl_pos+1); break; case '\010': case '\177': gl_del(-1); /* ^H and DEL */ break; case '\t': /* TAB */ if (gl_tab_hook) { tmp = gl_pos; loc = gl_tab_hook(gl_buf, gl_strlen(gl_prompt), &tmp); if (loc != -1 || tmp != gl_pos) gl_fixup(gl_prompt, loc, tmp); } break; case '\013': gl_kill(gl_pos); /* ^K */ break; case '\014': gl_redraw(); /* ^L */ break; case '\016': /* ^N */ strncpy(gl_buf, gl_hist_next(), BUF_SIZE-2); gl_buf[BUF_SIZE-2] = '\0'; if (gl_in_hook) gl_in_hook(gl_buf); gl_fixup(gl_prompt, 0, BUF_SIZE); break; case '\017': gl_overwrite = !gl_overwrite; /* ^O */ break; case '\020': /* ^P */ strncpy(gl_buf, gl_hist_prev(),BUF_SIZE-2); gl_buf[BUF_SIZE-2] = '\0'; if (gl_in_hook) gl_in_hook(gl_buf); gl_fixup(gl_prompt, 0, BUF_SIZE); break; case '\022': search_back(1); /* ^R */ break; case '\023': search_forw(1); /* ^S */ break; case '\024': gl_transpose(); /* ^T */ break; case '\025': gl_kill(0); /* ^U */ break; case '\027': gl_killword(-1); /* ^W */ break; case '\031': gl_yank(); /* ^Y */ break; case '\032': /* ^Z */ gl_newline(); gl_cleanup(); return 1; /*NOTREACHED*/ break; case '\033': /* ansi arrow keys */ c = gl_getc(); if (c == '[') { switch(c = gl_getc()) { case 'A': /* up */ strncpy(gl_buf, gl_hist_prev(), BUF_SIZE-2); gl_buf[BUF_SIZE-2] = '\0'; if (gl_in_hook) gl_in_hook(gl_buf); gl_fixup(gl_prompt, 0, BUF_SIZE); break; case 'B': /* down */ strncpy(gl_buf, gl_hist_next(), BUF_SIZE-2); gl_buf[BUF_SIZE-2] = '\0'; if (gl_in_hook) gl_in_hook(gl_buf); gl_fixup(gl_prompt, 0, BUF_SIZE); break; case 'C': /* right */ if(mbcslocale) { mb_len = 0; mbs_init(&mb_st); for(i = 0; i <= gl_pos ;) { mbrtowc(&wc, gl_buf+i, MB_CUR_MAX, &mb_st); mb_len = Ri18n_wcwidth(wc); i += (wc==0) ? 0 : mb_len; } gl_fixup(gl_prompt, -1, gl_pos + mb_len); } else gl_fixup(gl_prompt, -1, gl_pos+1); break; case 'D': /* left */ if(mbcslocale) { mb_len = 0; mbs_init(&mb_st); for(i = 0; i <= gl_pos ;) { mbrtowc(&wc, gl_buf+i, MB_CUR_MAX, &mb_st); mb_len = Ri18n_wcwidth(wc); i += (wc==0) ? 0 :mb_len; } gl_fixup(gl_prompt, -1, gl_pos - mb_len); } else gl_fixup(gl_prompt, -1, gl_pos-1); break; default: gl_putc('\007'); /* who knows */ break; } } else if (c == 'f' || c == 'F') { gl_word(1); } else if (c == 'b' || c == 'B') { gl_word(-1); } else gl_putc('\007'); break; default: /* check for a terminal signal */ if (c > 0) gl_putc('\007'); break; } } } gl_newline(); gl_cleanup(); return 0; }