Ejemplo n.º 1
0
/* Used in charcter.c, gnuwin32/console.c */
attribute_hidden
int Ri18n_wcswidth (const wchar_t *s, size_t n)
{
    int rs = 0;
    while ((n-- > 0) && (*s != L'\0'))
    {
	int now = Ri18n_wcwidth (*s);
	if (now == -1) return -1;
	rs += now;
	s++;
    }
    return rs;
}
Ejemplo n.º 2
0
static void
gl_del(int loc)
/*
 * Delete a character.  The loc variable can be:
 *    -1 : delete character to left of cursor
 *     0 : delete character under cursor
 */
{
   int i;

   if(mbcslocale) {
       int mb_len;
       mbstate_t mb_st;
       wchar_t wc;

       mb_len=0;
       mbs_init(&mb_st);
   
       if ((loc == -1 && gl_pos > 0) || (loc == 0 && gl_pos < gl_cnt)) {
	   for(i = 0; i<= gl_pos + loc;) {
	       mbrtowc(&wc,gl_buf+i, MB_CUR_MAX, &mb_st);
	       mb_len = Ri18n_wcwidth(wc);
	       i += (wc==0) ? 0 : mb_len;
	   }
	   for (i = gl_pos+(loc*mb_len); i <= gl_cnt - mb_len; i++)
	       gl_buf[i] = gl_buf[i + mb_len];
	   gl_fixup(gl_prompt,gl_pos+(loc * mb_len) , gl_pos+(loc * mb_len));
       } else
	   gl_beep();
   } else   
       if ((loc == -1 && gl_pos > 0) || (loc == 0 && gl_pos < gl_cnt)) {
	   for (i = gl_pos+loc; i < gl_cnt; i++)
	       gl_buf[i] = gl_buf[i+1];
	   gl_fixup(gl_prompt, gl_pos+loc, gl_pos+loc);
       } else
	   gl_beep();
}
Ejemplo n.º 3
0
int R_nchar(SEXP string, nchar_type type_,
	    Rboolean allowNA, Rboolean keepNA, const char* msg_name)
{
    if (string == NA_STRING)
	return keepNA ? NA_INTEGER : 2;
    // else :
    switch(type_) {
    case Bytes:
	return LENGTH(string);
	break;
    case Chars:
	if (IS_UTF8(string)) {
	    const char *p = CHAR(string);
	    if (!utf8Valid(p)) {
		if (!allowNA)
		    error(_("invalid multibyte string, %s"), msg_name);
		return NA_INTEGER;
	    } else {
		int nc = 0;
		for( ; *p; p += utf8clen(*p)) nc++;
		return nc;
	    }
	} else if (IS_BYTES(string)) {
	    if (!allowNA) /* could do chars 0 */
		error(_("number of characters is not computable in \"bytes\" encoding, %s"),
		      msg_name);
	    return NA_INTEGER;
	} else if (mbcslocale) {
	    int nc = (int) mbstowcs(NULL, translateChar(string), 0);
	    if (!allowNA && nc < 0)
		error(_("invalid multibyte string, %s"), msg_name);
	    return (nc >= 0 ? nc : NA_INTEGER);
	} else
	    return ((int) strlen(translateChar(string)));
	break;
    case Width:
	if (IS_UTF8(string)) {
	    const char *p = CHAR(string);
	    if (!utf8Valid(p)) {
		if (!allowNA)
		    error(_("invalid multibyte string, %s"), msg_name);
		return NA_INTEGER;
	    } else {
		wchar_t wc1;
		int nc = 0;
		for( ; *p; p += utf8clen(*p)) {
		    utf8toucs(&wc1, p);
		    nc += Ri18n_wcwidth(wc1);
		}
		return nc;
	    }
	} else if (IS_BYTES(string)) {
	    if (!allowNA) /* could do width 0 */
		error(_("width is not computable for %s in \"bytes\" encoding"),
		      msg_name);
	    return NA_INTEGER;
	} else if (mbcslocale) {
	    const char *xi = translateChar(string);
	    int nc = (int) mbstowcs(NULL, xi, 0);
	    if (nc >= 0) {
		const void *vmax = vmaxget();
		wchar_t *wc = (wchar_t *)
		    R_AllocStringBuffer((nc+1)*sizeof(wchar_t), &cbuff);
		mbstowcs(wc, xi, nc + 1);
		int nci18n = Ri18n_wcswidth(wc, 2147483647);
		vmaxset(vmax);
		return (nci18n < 1) ? nc : nci18n;
	    } else if (allowNA)
		error(_("invalid multibyte string, %s"), msg_name);
	    else
		return NA_INTEGER;
	} else
	    return (int) strlen(translateChar(string));

    } // switch
    return NA_INTEGER; // -Wall
} // R_nchar()
Ejemplo n.º 4
0
SEXP attribute_hidden do_nchar(SEXP call, SEXP op, SEXP args, SEXP env)
{
    SEXP d, s, x, stype;
    int i, len, allowNA;
    size_t ntype;
    int nc;
    const char *type;
    const char *xi;
    wchar_t *wc;
    const void *vmax;

    checkArity(op, args);
    if (isFactor(CAR(args)))
	error(_("'%s' requires a character vector"), "nchar()");
    PROTECT(x = coerceVector(CAR(args), STRSXP));
    if (!isString(x))
	error(_("'%s' requires a character vector"), "nchar()");
    len = LENGTH(x);
    stype = CADR(args);
    if (!isString(stype) || LENGTH(stype) != 1)
	error(_("invalid '%s' argument"), "type");
    type = CHAR(STRING_ELT(stype, 0)); /* always ASCII */
    ntype = strlen(type);
    if (ntype == 0) error(_("invalid '%s' argument"), "type");
    allowNA = asLogical(CADDR(args));
    if (allowNA == NA_LOGICAL) allowNA = 0;

    PROTECT(s = allocVector(INTSXP, len));
    vmax = vmaxget();
    for (i = 0; i < len; i++) {
	SEXP sxi = STRING_ELT(x, i);
	if (sxi == NA_STRING) {
	    INTEGER(s)[i] = 2;
	    continue;
	}
	if (strncmp(type, "bytes", ntype) == 0) {
	    INTEGER(s)[i] = LENGTH(sxi);
	} else if (strncmp(type, "chars", ntype) == 0) {
	    if (IS_UTF8(sxi)) { /* assume this is valid */
		const char *p = CHAR(sxi);
		nc = 0;
		for( ; *p; p += utf8clen(*p)) nc++;
		INTEGER(s)[i] = nc;
	    } else if (IS_BYTES(sxi)) {
		if (!allowNA) /* could do chars 0 */
		    error(_("number of characters is not computable for element %d in \"bytes\" encoding"), i+1);
		INTEGER(s)[i] = NA_INTEGER;
	    } else if (mbcslocale) {
		nc = mbstowcs(NULL, translateChar(sxi), 0);
		if (!allowNA && nc < 0)
		    error(_("invalid multibyte string %d"), i+1);
		INTEGER(s)[i] = nc >= 0 ? nc : NA_INTEGER;
	    } else
		INTEGER(s)[i] = strlen(translateChar(sxi));
	} else if (strncmp(type, "width", ntype) == 0) {
	    if (IS_UTF8(sxi)) { /* assume this is valid */
		const char *p = CHAR(sxi);
		wchar_t wc1;
		nc = 0;
		for( ; *p; p += utf8clen(*p)) {
		    utf8toucs(&wc1, p);
		    nc += Ri18n_wcwidth(wc1);
		}
		INTEGER(s)[i] = nc;
	    } else if (IS_BYTES(sxi)) {
		if (!allowNA) /* could do width 0 */
		    error(_("width is not computable for element %d in \"bytes\" encoding"), i+1);
		INTEGER(s)[i] = NA_INTEGER;
	    } else if (mbcslocale) {
		xi = translateChar(sxi);
		nc = mbstowcs(NULL, xi, 0);
		if (nc >= 0) {
		    wc = (wchar_t *) R_AllocStringBuffer((nc+1)*sizeof(wchar_t), &cbuff);

		    mbstowcs(wc, xi, nc + 1);
		    INTEGER(s)[i] = Ri18n_wcswidth(wc, 2147483647);
		    if (INTEGER(s)[i] < 1) INTEGER(s)[i] = nc;
		} else if (allowNA)
		    error(_("invalid multibyte string %d"), i+1);
		else
		    INTEGER(s)[i] = NA_INTEGER;
	    } else
		INTEGER(s)[i] = strlen(translateChar(sxi));
	} else
	    error(_("invalid '%s' argument"), "type");
	vmaxset(vmax);
    }
    R_FreeStringBufferL(&cbuff);
    if ((d = getAttrib(x, R_NamesSymbol)) != R_NilValue)
	setAttrib(s, R_NamesSymbol, d);
    if ((d = getAttrib(x, R_DimSymbol)) != R_NilValue)
	setAttrib(s, R_DimSymbol, d);
    if ((d = getAttrib(x, R_DimNamesSymbol)) != R_NilValue)
	setAttrib(s, R_DimNamesSymbol, d);
    UNPROTECT(2);
    return s;
}
Ejemplo n.º 5
0
/* strlen() using escaped rather than literal form,
   and allowing for embedded nuls.
   In MBCS locales it works in characters, and reports in display width.
   Also used in printarray.c.
 */
attribute_hidden
int Rstrwid(const char *str, int slen, cetype_t ienc, int quote)
{
    const char *p = str;
    int len = 0, i;

    if(mbcslocale || ienc == CE_UTF8) {
	int res;
	mbstate_t mb_st;
	wchar_t wc;
	unsigned int k; /* not wint_t as it might be signed */

	if(ienc != CE_UTF8)  mbs_init(&mb_st);
	for (i = 0; i < slen; i++) {
	    res = (ienc == CE_UTF8) ? (int) utf8toucs(&wc, p):
		(int) mbrtowc(&wc, p, MB_CUR_MAX, NULL);
	    if(res >= 0) {
		k = wc;
		if(0x20 <= k && k < 0x7f && iswprint(wc)) {
		    switch(wc) {
		    case L'\\':
			len += 2;
			break;
		    case L'\'':
		    case L'"':
			len += (quote == *p) ? 2 : 1;
			break;
		    default:
			len++; /* assumes these are all width 1 */
			break;
		    }
		    p++;
		} else if (k < 0x80) {
		    switch(wc) {
		    case L'\a':
		    case L'\b':
		    case L'\f':
		    case L'\n':
		    case L'\r':
		    case L'\t':
		    case L'\v':
		    case L'\0':
			len += 2; break;
		    default:
			/* print in octal */
			len += 4; break;
		    }
		    p++;
		} else {
		    len += iswprint((wint_t)wc) ? Ri18n_wcwidth(wc) :
#ifdef Win32
			6;
#else
		    (k > 0xffff ? 10 : 6);
#endif
		    i += (res - 1);
		    p += res;
		}
	    } else {
		len += 4;
		p++;
	    }
	}
    } else
	for (i = 0; i < slen; i++) {
	    /* ASCII */
	    if((unsigned char) *p < 0x80) {
		if(isprint((int)*p)) {
		    switch(*p) {
		    case '\\':
			len += 2; break;
		    case '\'':
		    case '"':
			len += (quote == *p)? 2 : 1; break;
		    default:
			len++; break;
		    }
		} else switch(*p) {
		    case '\a':
		    case '\b':
		    case '\f':
		    case '\n':
		    case '\r':
		    case '\t':
		    case '\v':
		    case '\0':
			len += 2; break;
		    default:
			/* print in octal */
			len += 4; break;
		    }
		p++;
	    } else { /* 8 bit char */
#ifdef Win32 /* It seems Windows does not know what is printable! */
		len++;
#else
		len += isprint((int)*p) ? 1 : 4;
#endif
		p++;
	    }
	}

    return len;
}
Ejemplo n.º 6
0
/* strlen() using escaped rather than literal form.
   In MBCS locales it works in characters, and reports in display width.
   Rstrwid is also used in printarray.c.

   This supported embedded nuls when we had those.
 */
attribute_hidden
int Rstrwid(const char *str, int slen, cetype_t ienc, int quote)
{
    const char *p = str;
    int len = 0, i;

    if(ienc == CE_BYTES) { // not currently used for that encoding
	for (i = 0; i < slen; i++) {
	    unsigned char k = str[i];
	    if (k >= 0x20 && k < 0x80) len += 1;
	    else len += 4;
	}
	return len;
    }
    /* Future-proof: currently that is all Rstrlen calls it with,
       and printarray has CE_NATIVE explicitly */
    if(ienc > 2) // CE_NATIVE, CE_UTF8, CE_BYTES are supported
	warning("unsupported encoding (%d) in Rstrwid", ienc);
    if(mbcslocale || ienc == CE_UTF8) {
	int res;
	mbstate_t mb_st;
	wchar_t wc;
	unsigned int k; /* not wint_t as it might be signed */

	if(ienc != CE_UTF8)  mbs_init(&mb_st);
	for (i = 0; i < slen; i++) {
	    res = (ienc == CE_UTF8) ? (int) utf8toucs(&wc, p):
		(int) mbrtowc(&wc, p, MB_CUR_MAX, NULL);
	    if(res >= 0) {
		k = wc;
		if(0x20 <= k && k < 0x7f && iswprint(wc)) {
		    switch(wc) {
		    case L'\\':
			len += 2;
			break;
		    case L'\'':
		    case L'"':
		    case L'`':
			len += (quote == *p) ? 2 : 1;
			break;
		    default:
			len++; /* assumes these are all width 1 */
			break;
		    }
		    p++;
		} else if (k < 0x80) {
		    switch(wc) {
		    case L'\a':
		    case L'\b':
		    case L'\f':
		    case L'\n':
		    case L'\r':
		    case L'\t':
		    case L'\v':
		    case L'\0':
			len += 2; break;
		    default:
			/* print in octal */
			len += 4; break;
		    }
		    p++;
		} else {
		    len += iswprint((wint_t)wc) ? Ri18n_wcwidth(wc) :
#ifdef Win32
			6;
#else
		    (k > 0xffff ? 10 : 6);
#endif
		    i += (res - 1);
		    p += res;
		}
	    } else {
		len += 4;
		p++;
	    }
	}
    } else // not MBCS nor marked as UTF-8
	for (i = 0; i < slen; i++) {
	    if((unsigned char) *p < 0x80) {
		/* ASCII */
		if(isprint((int)*p)) {
		    switch(*p) {
		    case '\\':
			len += 2; break;
		    case '\'':
		    case '"':
		    case '`':
			len += (quote == *p)? 2 : 1; break;
		    default:
			len++; break;
		    }
		} else switch(*p) {
		    case '\a':
		    case '\b':
		    case '\f':
		    case '\n':
		    case '\r':
		    case '\t':
		    case '\v':
		    case '\0':
			len += 2; break;
		    default:
			/* print in octal */
			len += 4; break;
		    }
		p++;
	    } else { /* 8 bit char */
#ifdef Win32 /* It seems Windows does not know what is printable! */
		len++;
#else
		len += isprint((int)*p) ? 1 : 4;
#endif
		p++;
	    }
	}

    return len;
}
Ejemplo n.º 7
0
int
getline(const char *prompt, char *buf, int buflen)
{
    int             c, loc, tmp;
    int mb_len;
    mbstate_t mb_st;
    int i;
    wchar_t wc;

    BUF_SIZE = buflen;
    gl_buf = buf;
    gl_buf[0] = '\0';
    if (setjmp(gl_jmp)) {
       gl_newline();
       gl_cleanup(); 
       return 0;
    }
    gl_init();	
    gl_pos = 0;
    gl_prompt = (prompt)? prompt : "";
    if (gl_in_hook)
	gl_in_hook(gl_buf);
    gl_fixup(gl_prompt, -2, BUF_SIZE);
    while ((c = gl_getc()) >= 0) {
	gl_extent = 0;  	/* reset to full extent */
	if (!iscntrl(c)) {
	    if (gl_search_mode)
	       search_addchar(c);
	    else
	       gl_addchar(c);
	} else {
	    if (gl_search_mode) {
	        if (c == '\033' || c == '\016' || c == '\020') {
	            search_term();
	            c = 0;     		/* ignore the character */
		} else if (c == '\010' || c == '\177') {
		    search_addchar(-1); /* unwind search string */
		    c = 0;
		} else if (c != '\022' && c != '\023') {
		    search_term();	/* terminate and handle char */
		}
	    }
	    switch (c) {
	      case '\n': case '\r': 			/* newline */
		gl_newline();
		gl_cleanup();
		return 0;
		/*NOTREACHED*/
		break; 
	      case '\001': gl_fixup(gl_prompt, -1, 0);		/* ^A */
		break;
	      case '\002': 	/* ^B */
		if(mbcslocale) {
		    mb_len = 0;
		    mbs_init(&mb_st);
		    for(i = 0; i < gl_pos ;) {
			mbrtowc(&wc, gl_buf+i, MB_CUR_MAX, &mb_st);
			mb_len = Ri18n_wcwidth(wc);
			i += (wc==0) ? 0 : mb_len;
		    }
		    gl_fixup(gl_prompt, -1, gl_pos - mb_len);
		} else
		    gl_fixup(gl_prompt, -1, gl_pos-1);
		break;
	      case '\003':                                      /* ^C */
		  gl_fixup(gl_prompt, -1, gl_cnt);
		  gl_puts("^C\n");
		  gl_kill(0);
		  gl_fixup(gl_prompt, -2, BUF_SIZE);
		break;
	      case '\004':					/* ^D */
		if (gl_cnt == 0) {
		    gl_buf[0] = 0;
		    gl_cleanup();
		    gl_putc('\n');
		    return 0;
		} else {
		    gl_del(0);
		}
		break;
	      case '\005': gl_fixup(gl_prompt, -1, gl_cnt);	/* ^E */
		break;
		case '\006': /* ^F */
		  if(mbcslocale) { 
		      if(gl_pos >= gl_cnt) break;
		      mb_len = 0;
		      mbs_init(&mb_st);
		      for(i = 0; i<= gl_pos ;){
			  mbrtowc(&wc, gl_buf+i, MB_CUR_MAX, &mb_st);
			  mb_len = Ri18n_wcwidth(wc);
			  i += (wc==0) ? 0 : mb_len;
		      }
		      gl_fixup(gl_prompt, -1, gl_pos + mb_len);
		  }
		else
		  gl_fixup(gl_prompt, -1, gl_pos+1);
		break;
	      case '\010': case '\177': gl_del(-1);	/* ^H and DEL */
		break;
	      case '\t':        				/* TAB */
                if (gl_tab_hook) {
		    tmp = gl_pos;
	            loc = gl_tab_hook(gl_buf, gl_strlen(gl_prompt), &tmp);
	            if (loc != -1 || tmp != gl_pos)
	                gl_fixup(gl_prompt, loc, tmp);
                }
		break;
	      case '\013': gl_kill(gl_pos);			/* ^K */
		break;
	      case '\014': gl_redraw();				/* ^L */
		break;
	      case '\016': 					/* ^N */
		strncpy(gl_buf, gl_hist_next(), BUF_SIZE-2);
		gl_buf[BUF_SIZE-2] = '\0';
                if (gl_in_hook)
	            gl_in_hook(gl_buf);
		gl_fixup(gl_prompt, 0, BUF_SIZE);
		break;
	      case '\017': gl_overwrite = !gl_overwrite;       	/* ^O */
		break;
	      case '\020': 					/* ^P */
		strncpy(gl_buf, gl_hist_prev(),BUF_SIZE-2);
		gl_buf[BUF_SIZE-2] = '\0';
                if (gl_in_hook)
	            gl_in_hook(gl_buf);
		gl_fixup(gl_prompt, 0, BUF_SIZE);
		break;
	      case '\022': search_back(1);			/* ^R */
		break;
	      case '\023': search_forw(1);			/* ^S */
		break;
	      case '\024': gl_transpose();			/* ^T */
		break;
              case '\025': gl_kill(0);				/* ^U */
		break;
              case '\027': gl_killword(-1);			/* ^W */
		break;
	      case '\031': gl_yank();				/* ^Y */
		break;
	      case '\032': 					/* ^Z */
		gl_newline();
		gl_cleanup();
		return 1;
		/*NOTREACHED*/
		break;
	      case '\033':				/* ansi arrow keys */
		c = gl_getc();
		if (c == '[') {
		    switch(c = gl_getc()) {
		      case 'A':             			/* up */
		        strncpy(gl_buf, gl_hist_prev(), BUF_SIZE-2);
		        gl_buf[BUF_SIZE-2] = '\0';
		        if (gl_in_hook)
	                    gl_in_hook(gl_buf);
		        gl_fixup(gl_prompt, 0, BUF_SIZE);
		        break;
		      case 'B':                         	/* down */
		        strncpy(gl_buf, gl_hist_next(), BUF_SIZE-2);
		        gl_buf[BUF_SIZE-2] = '\0';
                        if (gl_in_hook)
	                    gl_in_hook(gl_buf);
		        gl_fixup(gl_prompt, 0, BUF_SIZE);
		        break;
		    case 'C': /* right */
			if(mbcslocale) { 
			    mb_len = 0;
			    mbs_init(&mb_st);
			    for(i = 0; i <= gl_pos ;) {
				mbrtowc(&wc, gl_buf+i, MB_CUR_MAX, &mb_st);
				mb_len = Ri18n_wcwidth(wc);
				i += (wc==0) ? 0 : mb_len;
			    }
			    gl_fixup(gl_prompt, -1, gl_pos + mb_len);
			} else
			    gl_fixup(gl_prompt, -1, gl_pos+1);
		        break;
		    case 'D': /* left */
		       if(mbcslocale) {
			   mb_len = 0;
			   mbs_init(&mb_st);
			   for(i = 0; i <= gl_pos ;) {
			       mbrtowc(&wc, gl_buf+i, MB_CUR_MAX, &mb_st);
			       mb_len = Ri18n_wcwidth(wc);
			       i += (wc==0) ? 0 :mb_len;
			   }
			   gl_fixup(gl_prompt, -1, gl_pos - mb_len);
		       } else
			 gl_fixup(gl_prompt, -1, gl_pos-1);
			break;
		      default: gl_putc('\007');         /* who knows */
		        break;
		    }
		} else if (c == 'f' || c == 'F') {
		    gl_word(1);
		} else if (c == 'b' || c == 'B') {
		    gl_word(-1);
		} else
		    gl_putc('\007');
		break;
	      default:		/* check for a terminal signal */
                if (c > 0)
		    gl_putc('\007');
		break;
	    }
	}
    }
    gl_newline();
    gl_cleanup();
    return 0;
}