static void substr(char *buf, const char *str, int ienc, int sa, int so) { /* Store the substring str [sa:so] into buf[] */ int i, j, used; if (ienc == CE_UTF8) { const char *end = str + strlen(str); for (i = 0; i < so && str < end; i++) { int used = utf8clen(*str); if (i < sa - 1) { str += used; continue; } for (j = 0; j < used; j++) *buf++ = *str++; } } else if (ienc == CE_LATIN1 || ienc == CE_BYTES) { for (str += (sa - 1), i = sa; i <= so; i++) *buf++ = *str++; } else { if (mbcslocale && !strIsASCII(str)) { const char *end = str + strlen(str); mbstate_t mb_st; mbs_init(&mb_st); for (i = 1; i < sa; i++) str += Mbrtowc(NULL, str, MB_CUR_MAX, &mb_st); for (i = sa; i <= so && str < end; i++) { used = (int) Mbrtowc(NULL, str, MB_CUR_MAX, &mb_st); for (j = 0; j < used; j++) *buf++ = *str++; } } else for (str += (sa - 1), i = sa; i <= so; i++) *buf++ = *str++; } *buf = '\0'; }
/* * Taken and adapted from R 3.2.2 src/main/gram.c@4915 */ int ALIKEC_is_valid_name(const char *name) { const char *p = name; int i; if(mbcslocale) { /* the only way to establish which chars are alpha etc is to use the wchar variants */ size_t n = strlen(name), used; wchar_t wc; used = Mbrtowc(&wc, p, n, NULL); if((int) used <= 0) return 0; p += used; n -= used; if (wc != L'.' && !iswalpha(wc) ) return 0; if (wc == L'.') { /* We don't care about other than ASCII digits */ if(isdigit(0xff & (int)*p)) return 0; /* Mbrtowc(&wc, p, n, NULL); if(iswdigit(wc)) return 0; */ } while((int)(used = Mbrtowc(&wc, p, n, NULL)) > 0) { if (!(iswalnum(wc) || wc == L'.' || wc == L'_')) break; p += used; n -= used; } if (*p != '\0') return 0; } else { // nocov start current local has MB_CUR_MAX > 1, so this never runs int c = 0xff & *p++; if (c != '.' && !isalpha(c) ) return 0; if (c == '.' && isdigit(0xff & (int)*p)) return 0; while ( c = 0xff & *p++, (isalnum(c) || c == '.' || c == '_') ) ; if (c != '\0') return 0; // nocov end } if (strcmp(name, "...") == 0) return 1; const char * keywords[20] = { "NULL", "NA", "TRUE", "FALSE", "Inf", "NaN", "NA_integer_", "NA_real_", "NA_character_", "NA_complex_", "function", "while", "repeat", "for", "if", "in", "else", "next", "break", "..." }; for (i = 0; i < 20; i++) if (strcmp(keywords[i], name) == 0) return 0; return 1; }
static void substrset(char *buf, const char *const str, cetype_t ienc, int sa, int so) { /* Replace the substring buf[sa:so] by str[] */ int i, in = 0, out = 0; if (ienc == CE_UTF8) { for (i = 1; i < sa; i++) buf += utf8clen(*buf); for (i = sa; i <= so && in < strlen(str); i++) { in += utf8clen(str[in]); out += utf8clen(buf[out]); if (!str[in]) break; } if (in != out) memmove(buf+in, buf+out, strlen(buf+out)+1); memcpy(buf, str, in); } else if (ienc == CE_LATIN1 || ienc == CE_BYTES) { in = (int) strlen(str); out = so - sa + 1; memcpy(buf + sa - 1, str, (in < out) ? in : out); } else { /* This cannot work for stateful encodings */ if (mbcslocale) { for (i = 1; i < sa; i++) buf += Mbrtowc(NULL, buf, MB_CUR_MAX, NULL); /* now work out how many bytes to replace by how many */ for (i = sa; i <= so && in < strlen(str); i++) { in += (int) Mbrtowc(NULL, str+in, MB_CUR_MAX, NULL); out += (int) Mbrtowc(NULL, buf+out, MB_CUR_MAX, NULL); if (!str[in]) break; } if (in != out) memmove(buf+in, buf+out, strlen(buf+out)+1); memcpy(buf, str, in); } else { in = (int) strlen(str); out = so - sa + 1; memcpy(buf + sa - 1, str, (in < out) ? in : out); } } }
/* We need to handle \ in paths which are to be passed to R code. Since these can include \\ for network drives, we cannot just use /, although we did prior to R 2.4.0. MBCS-aware since 2.4.0. */ static void double_backslashes(char *s, char *out) { char *p = s; int i; if(mbcslocale) { mbstate_t mb_st; int used; mbs_init(&mb_st); while((used = Mbrtowc(NULL, p, MB_CUR_MAX, &mb_st))) { if(*p == '\\') *out++ = '\\'; for(i = 0; i < used; i++) *out++ = *p++; } } else for (; *p; p++) if (*p == '\\') {*out++ = *p; *out++ = *p;} else *out++ = *p; *out = '\0'; }
SEXP attribute_hidden do_makenames(SEXP call, SEXP op, SEXP args, SEXP env) { SEXP arg, ans; R_xlen_t i, n; int l, allow_; char *p, *tmp = NULL, *cbuf; const char *This; Rboolean need_prefix; const void *vmax; checkArity(op ,args); arg = CAR(args); if (!isString(arg)) error(_("non-character names")); n = XLENGTH(arg); allow_ = asLogical(CADR(args)); if (allow_ == NA_LOGICAL) error(_("invalid '%s' value"), "allow_"); PROTECT(ans = allocVector(STRSXP, n)); vmax = vmaxget(); for (i = 0 ; i < n ; i++) { This = translateChar(STRING_ELT(arg, i)); l = (int) strlen(This); /* need to prefix names not beginning with alpha or ., as well as . followed by a number */ need_prefix = FALSE; if (mbcslocale && This[0]) { int nc = l, used; wchar_t wc; mbstate_t mb_st; const char *pp = This; mbs_init(&mb_st); used = (int) Mbrtowc(&wc, pp, MB_CUR_MAX, &mb_st); pp += used; nc -= used; if (wc == L'.') { if (nc > 0) { Mbrtowc(&wc, pp, MB_CUR_MAX, &mb_st); if (iswdigit(wc)) need_prefix = TRUE; } } else if (!iswalpha(wc)) need_prefix = TRUE; } else { if (This[0] == '.') { if (l >= 1 && isdigit(0xff & (int) This[1])) need_prefix = TRUE; } else if (!isalpha(0xff & (int) This[0])) need_prefix = TRUE; } if (need_prefix) { tmp = Calloc(l+2, char); strcpy(tmp, "X"); strcat(tmp, translateChar(STRING_ELT(arg, i))); } else { tmp = Calloc(l+1, char); strcpy(tmp, translateChar(STRING_ELT(arg, i))); } if (mbcslocale) { /* This cannot lengthen the string, so safe to overwrite it. Would also be possible a char at a time. */ int nc = (int) mbstowcs(NULL, tmp, 0); wchar_t *wstr = Calloc(nc+1, wchar_t), *wc; if (nc >= 0) { mbstowcs(wstr, tmp, nc+1); for (wc = wstr; *wc; wc++) { if (*wc == L'.' || (allow_ && *wc == L'_')) /* leave alone */; else if (!iswalnum((int)*wc)) *wc = L'.'; /* If it changes into dot here, * length will become short on mbcs. * The name which became short will contain garbage. * cf. * > make.names(c("\u30fb")) * [1] "X.\0" */ } wcstombs(tmp, wstr, strlen(tmp)+1); Free(wstr); } else error(_("invalid multibyte string %d"), i+1); } else { for (p = tmp; *p; p++) {