Beispiel #1
0
static void substr(char *buf, const char *str, int ienc, int sa, int so)
{
/* Store the substring	str [sa:so]  into buf[] */
    int i, j, used;

    if (ienc == CE_UTF8) {
	const char *end = str + strlen(str);
	for (i = 0; i < so && str < end; i++) {
	    int used = utf8clen(*str);
	    if (i < sa - 1) { str += used; continue; }
	    for (j = 0; j < used; j++) *buf++ = *str++;
	}
    } else if (ienc == CE_LATIN1 || ienc == CE_BYTES) {
	for (str += (sa - 1), i = sa; i <= so; i++) *buf++ = *str++;
    } else {
	if (mbcslocale && !strIsASCII(str)) {
	    const char *end = str + strlen(str);
	    mbstate_t mb_st;
	    mbs_init(&mb_st);
	    for (i = 1; i < sa; i++) str += Mbrtowc(NULL, str, MB_CUR_MAX, &mb_st);
	    for (i = sa; i <= so && str < end; i++) {
		used = (int) Mbrtowc(NULL, str, MB_CUR_MAX, &mb_st);
		for (j = 0; j < used; j++) *buf++ = *str++;
	    }
	} else
	    for (str += (sa - 1), i = sa; i <= so; i++) *buf++ = *str++;
    }
    *buf = '\0';
}
Beispiel #2
0
/*
 * Taken and adapted from R 3.2.2 src/main/gram.c@4915
 */
int ALIKEC_is_valid_name(const char *name)
{
  const char *p = name;
  int i;

  if(mbcslocale) {
    /* the only way to establish which chars are alpha etc is to
       use the wchar variants */
    size_t n = strlen(name), used;
    wchar_t wc;
    used = Mbrtowc(&wc, p, n, NULL);
    if((int) used <= 0) return 0;
    p += used; n -= used;
    if (wc != L'.' && !iswalpha(wc) ) return 0;
    if (wc == L'.') {
      /* We don't care about other than ASCII digits */
      if(isdigit(0xff & (int)*p)) return 0;
      /* Mbrtowc(&wc, p, n, NULL); if(iswdigit(wc)) return 0; */
    }
    while((int)(used = Mbrtowc(&wc, p, n, NULL)) > 0) {
      if (!(iswalnum(wc) || wc == L'.' || wc == L'_')) break;
      p += used; n -= used;
    }
    if (*p != '\0') return 0;
  } else {
    // nocov start current local has MB_CUR_MAX > 1, so this never runs
    int c = 0xff & *p++;
    if (c != '.' && !isalpha(c) ) return 0;
    if (c == '.' && isdigit(0xff & (int)*p)) return 0;
    while ( c = 0xff & *p++, (isalnum(c) || c == '.' || c == '_') ) ;
    if (c != '\0') return 0;
    // nocov end
  }

  if (strcmp(name, "...") == 0) return 1;

  const char * keywords[20] = {
    "NULL", "NA", "TRUE", "FALSE", "Inf", "NaN", "NA_integer_", "NA_real_",
    "NA_character_", "NA_complex_", "function", "while", "repeat", "for",
    "if", "in", "else", "next", "break", "..."
  };
  for (i = 0; i < 20; i++)
    if (strcmp(keywords[i], name) == 0) return 0;

  return 1;
}
Beispiel #3
0
static void
substrset(char *buf, const char *const str, cetype_t ienc, int sa, int so)
{
    /* Replace the substring buf[sa:so] by str[] */
    int i, in = 0, out = 0;

    if (ienc == CE_UTF8) {
	for (i = 1; i < sa; i++) buf += utf8clen(*buf);
	for (i = sa; i <= so && in < strlen(str); i++) {
	    in +=  utf8clen(str[in]);
	    out += utf8clen(buf[out]);
	    if (!str[in]) break;
	}
	if (in != out) memmove(buf+in, buf+out, strlen(buf+out)+1);
	memcpy(buf, str, in);
    } else if (ienc == CE_LATIN1 || ienc == CE_BYTES) {
	in = (int) strlen(str);
	out = so - sa + 1;
	memcpy(buf + sa - 1, str, (in < out) ? in : out);
    } else {
	/* This cannot work for stateful encodings */
	if (mbcslocale) {
	    for (i = 1; i < sa; i++) buf += Mbrtowc(NULL, buf, MB_CUR_MAX, NULL);
	    /* now work out how many bytes to replace by how many */
	    for (i = sa; i <= so && in < strlen(str); i++) {
		in += (int) Mbrtowc(NULL, str+in, MB_CUR_MAX, NULL);
		out += (int) Mbrtowc(NULL, buf+out, MB_CUR_MAX, NULL);
		if (!str[in]) break;
	    }
	    if (in != out) memmove(buf+in, buf+out, strlen(buf+out)+1);
	    memcpy(buf, str, in);
	} else {
	    in = (int) strlen(str);
	    out = so - sa + 1;
	    memcpy(buf + sa - 1, str, (in < out) ? in : out);
	}
    }
}
Beispiel #4
0
/* We need to handle \ in paths which are to be passed to R code.
   Since these can include \\ for network drives, we cannot just use /,
   although we did prior to R 2.4.0.

   MBCS-aware since 2.4.0.
 */
static void double_backslashes(char *s, char *out)
{
    char *p = s;

    int i;
    if(mbcslocale) {
	mbstate_t mb_st; int used;
	mbs_init(&mb_st);
	while((used = Mbrtowc(NULL, p, MB_CUR_MAX, &mb_st))) {
	    if(*p == '\\') *out++ = '\\';
	    for(i = 0; i < used; i++) *out++ = *p++;
	}
    } else
	for (; *p; p++)
	    if (*p == '\\') {*out++ = *p; *out++ = *p;} else *out++ = *p;
    *out = '\0';
}
Beispiel #5
0
SEXP attribute_hidden do_makenames(SEXP call, SEXP op, SEXP args, SEXP env)
{
    SEXP arg, ans;
    R_xlen_t i, n;
    int l, allow_;
    char *p, *tmp = NULL, *cbuf;
    const char *This;
    Rboolean need_prefix;
    const void *vmax;

    checkArity(op ,args);
    arg = CAR(args);
    if (!isString(arg))
	error(_("non-character names"));
    n = XLENGTH(arg);
    allow_ = asLogical(CADR(args));
    if (allow_ == NA_LOGICAL)
	error(_("invalid '%s' value"), "allow_");
    PROTECT(ans = allocVector(STRSXP, n));
    vmax = vmaxget();
    for (i = 0 ; i < n ; i++) {
	This = translateChar(STRING_ELT(arg, i));
	l = (int) strlen(This);
	/* need to prefix names not beginning with alpha or ., as
	   well as . followed by a number */
	need_prefix = FALSE;
	if (mbcslocale && This[0]) {
	    int nc = l, used;
	    wchar_t wc;
	    mbstate_t mb_st;
	    const char *pp = This;
	    mbs_init(&mb_st);
	    used = (int) Mbrtowc(&wc, pp, MB_CUR_MAX, &mb_st);
	    pp += used; nc -= used;
	    if (wc == L'.') {
		if (nc > 0) {
		    Mbrtowc(&wc, pp, MB_CUR_MAX, &mb_st);
		    if (iswdigit(wc))  need_prefix = TRUE;
		}
	    } else if (!iswalpha(wc)) need_prefix = TRUE;
	} else {
	    if (This[0] == '.') {
		if (l >= 1 && isdigit(0xff & (int) This[1])) need_prefix = TRUE;
	    } else if (!isalpha(0xff & (int) This[0])) need_prefix = TRUE;
	}
	if (need_prefix) {
	    tmp = Calloc(l+2, char);
	    strcpy(tmp, "X");
	    strcat(tmp, translateChar(STRING_ELT(arg, i)));
	} else {
	    tmp = Calloc(l+1, char);
	    strcpy(tmp, translateChar(STRING_ELT(arg, i)));
	}
	if (mbcslocale) {
	    /* This cannot lengthen the string, so safe to overwrite it.
	       Would also be possible a char at a time.
	     */
	    int nc = (int) mbstowcs(NULL, tmp, 0);
	    wchar_t *wstr = Calloc(nc+1, wchar_t), *wc;
	    if (nc >= 0) {
		mbstowcs(wstr, tmp, nc+1);
		for (wc = wstr; *wc; wc++) {
		    if (*wc == L'.' || (allow_ && *wc == L'_'))
			/* leave alone */;
		    else if (!iswalnum((int)*wc)) *wc = L'.';
		    /* If it changes into dot here,
		     * length will become short on mbcs.
		     * The name which became short will contain garbage.
		     * cf.
		     *   >  make.names(c("\u30fb"))
		     *   [1] "X.\0"
		     */
		}
		wcstombs(tmp, wstr, strlen(tmp)+1);
		Free(wstr);
	    } else error(_("invalid multibyte string %d"), i+1);
	} else {
	    for (p = tmp; *p; p++) {