Beispiel #1
0
int R_nchar(SEXP string, nchar_type type_,
	    Rboolean allowNA, Rboolean keepNA, const char* msg_name)
{
    if (string == NA_STRING)
	return keepNA ? NA_INTEGER : 2;
    // else :
    switch(type_) {
    case Bytes:
	return LENGTH(string);
	break;
    case Chars:
	if (IS_UTF8(string)) {
	    const char *p = CHAR(string);
	    if (!utf8Valid(p)) {
		if (!allowNA)
		    error(_("invalid multibyte string, %s"), msg_name);
		return NA_INTEGER;
	    } else {
		int nc = 0;
		for( ; *p; p += utf8clen(*p)) nc++;
		return nc;
	    }
	} else if (IS_BYTES(string)) {
	    if (!allowNA) /* could do chars 0 */
		error(_("number of characters is not computable in \"bytes\" encoding, %s"),
		      msg_name);
	    return NA_INTEGER;
	} else if (mbcslocale) {
	    int nc = (int) mbstowcs(NULL, translateChar(string), 0);
	    if (!allowNA && nc < 0)
		error(_("invalid multibyte string, %s"), msg_name);
	    return (nc >= 0 ? nc : NA_INTEGER);
	} else
	    return ((int) strlen(translateChar(string)));
	break;
    case Width:
	if (IS_UTF8(string)) {
	    const char *p = CHAR(string);
	    if (!utf8Valid(p)) {
		if (!allowNA)
		    error(_("invalid multibyte string, %s"), msg_name);
		return NA_INTEGER;
	    } else {
		wchar_t wc1;
		int nc = 0;
		for( ; *p; p += utf8clen(*p)) {
		    utf8toucs(&wc1, p);
		    nc += Ri18n_wcwidth(wc1);
		}
		return nc;
	    }
	} else if (IS_BYTES(string)) {
	    if (!allowNA) /* could do width 0 */
		error(_("width is not computable for %s in \"bytes\" encoding"),
		      msg_name);
	    return NA_INTEGER;
	} else if (mbcslocale) {
	    const char *xi = translateChar(string);
	    int nc = (int) mbstowcs(NULL, xi, 0);
	    if (nc >= 0) {
		const void *vmax = vmaxget();
		wchar_t *wc = (wchar_t *)
		    R_AllocStringBuffer((nc+1)*sizeof(wchar_t), &cbuff);
		mbstowcs(wc, xi, nc + 1);
		int nci18n = Ri18n_wcswidth(wc, 2147483647);
		vmaxset(vmax);
		return (nci18n < 1) ? nc : nci18n;
	    } else if (allowNA)
		error(_("invalid multibyte string, %s"), msg_name);
	    else
		return NA_INTEGER;
	} else
	    return (int) strlen(translateChar(string));

    } // switch
    return NA_INTEGER; // -Wall
} // R_nchar()
Beispiel #2
0
SEXP attribute_hidden do_nchar(SEXP call, SEXP op, SEXP args, SEXP env)
{
    SEXP d, s, x, stype;
    int i, len, allowNA;
    size_t ntype;
    int nc;
    const char *type;
    const char *xi;
    wchar_t *wc;
    const void *vmax;

    checkArity(op, args);
    if (isFactor(CAR(args)))
	error(_("'%s' requires a character vector"), "nchar()");
    PROTECT(x = coerceVector(CAR(args), STRSXP));
    if (!isString(x))
	error(_("'%s' requires a character vector"), "nchar()");
    len = LENGTH(x);
    stype = CADR(args);
    if (!isString(stype) || LENGTH(stype) != 1)
	error(_("invalid '%s' argument"), "type");
    type = CHAR(STRING_ELT(stype, 0)); /* always ASCII */
    ntype = strlen(type);
    if (ntype == 0) error(_("invalid '%s' argument"), "type");
    allowNA = asLogical(CADDR(args));
    if (allowNA == NA_LOGICAL) allowNA = 0;

    PROTECT(s = allocVector(INTSXP, len));
    vmax = vmaxget();
    for (i = 0; i < len; i++) {
	SEXP sxi = STRING_ELT(x, i);
	if (sxi == NA_STRING) {
	    INTEGER(s)[i] = 2;
	    continue;
	}
	if (strncmp(type, "bytes", ntype) == 0) {
	    INTEGER(s)[i] = LENGTH(sxi);
	} else if (strncmp(type, "chars", ntype) == 0) {
	    if (IS_UTF8(sxi)) { /* assume this is valid */
		const char *p = CHAR(sxi);
		nc = 0;
		for( ; *p; p += utf8clen(*p)) nc++;
		INTEGER(s)[i] = nc;
	    } else if (IS_BYTES(sxi)) {
		if (!allowNA) /* could do chars 0 */
		    error(_("number of characters is not computable for element %d in \"bytes\" encoding"), i+1);
		INTEGER(s)[i] = NA_INTEGER;
	    } else if (mbcslocale) {
		nc = mbstowcs(NULL, translateChar(sxi), 0);
		if (!allowNA && nc < 0)
		    error(_("invalid multibyte string %d"), i+1);
		INTEGER(s)[i] = nc >= 0 ? nc : NA_INTEGER;
	    } else
		INTEGER(s)[i] = strlen(translateChar(sxi));
	} else if (strncmp(type, "width", ntype) == 0) {
	    if (IS_UTF8(sxi)) { /* assume this is valid */
		const char *p = CHAR(sxi);
		wchar_t wc1;
		nc = 0;
		for( ; *p; p += utf8clen(*p)) {
		    utf8toucs(&wc1, p);
		    nc += Ri18n_wcwidth(wc1);
		}
		INTEGER(s)[i] = nc;
	    } else if (IS_BYTES(sxi)) {
		if (!allowNA) /* could do width 0 */
		    error(_("width is not computable for element %d in \"bytes\" encoding"), i+1);
		INTEGER(s)[i] = NA_INTEGER;
	    } else if (mbcslocale) {
		xi = translateChar(sxi);
		nc = mbstowcs(NULL, xi, 0);
		if (nc >= 0) {
		    wc = (wchar_t *) R_AllocStringBuffer((nc+1)*sizeof(wchar_t), &cbuff);

		    mbstowcs(wc, xi, nc + 1);
		    INTEGER(s)[i] = Ri18n_wcswidth(wc, 2147483647);
		    if (INTEGER(s)[i] < 1) INTEGER(s)[i] = nc;
		} else if (allowNA)
		    error(_("invalid multibyte string %d"), i+1);
		else
		    INTEGER(s)[i] = NA_INTEGER;
	    } else
		INTEGER(s)[i] = strlen(translateChar(sxi));
	} else
	    error(_("invalid '%s' argument"), "type");
	vmaxset(vmax);
    }
    R_FreeStringBufferL(&cbuff);
    if ((d = getAttrib(x, R_NamesSymbol)) != R_NilValue)
	setAttrib(s, R_NamesSymbol, d);
    if ((d = getAttrib(x, R_DimSymbol)) != R_NilValue)
	setAttrib(s, R_DimSymbol, d);
    if ((d = getAttrib(x, R_DimNamesSymbol)) != R_NilValue)
	setAttrib(s, R_DimNamesSymbol, d);
    UNPROTECT(2);
    return s;
}