Example #1
0
/* Operates on three CHARSXP (internal) arguments. Returns STRSXP vector.
 * Returned strings alternate between "outside" and "inside" blocks.
 * E.g. "Four .(score) and seven .(years)"
 * returns c("Four ", "score", " and seven ", "years")
 */
SEXP find_subst_expressions(SEXP str, SEXP begin_delim, SEXP end_delim) {
  const char *s, *b, *e;
  const char *p, *start;
  const char *before, *begin, *end, *after;
  cetype_t enc;
  step_t step;
  int nBlocks, i;
  SEXP out;

  assert_type(str, CHARSXP);
  assert_type(begin_delim, CHARSXP);
  assert_type(end_delim, CHARSXP);

  step = get_stepper(str, &s, &enc);

  if (getCharCE(begin_delim) != enc) {
    b = Rf_reEnc(CHAR(begin_delim), getCharCE(begin_delim), enc, 0);
  } else {
    b = CHAR(begin_delim);
  }

  if (getCharCE(end_delim) != enc) {
    e = Rf_reEnc(CHAR(end_delim), getCharCE(end_delim), enc, 0);
  } else {
    e = CHAR(end_delim);
  }

  /* Scan once to count how many blocks we need, then scan again (ugh) */
  nBlocks = 0;
  p = s;
  while(p && *p) {
    if ( (p = block_search(p, b, e, NULL, NULL, NULL, NULL, step)) ) {
      nBlocks++;
    }
  }

  out = PROTECT(allocVector(STRSXP, 2*nBlocks+1));
  start = s;
  p = s;
  for (i = 0; i < nBlocks; i++) {
    p = block_search(p, b, e,
                     &before, &begin, &end, &after,
                     step);
    /* extract leading unescaped block, then escaped block */
    if (p) {
      SET_STRING_ELT(out, 2*i, mkCharLenCE(start, before-start, enc));
      SET_STRING_ELT(out, 2*i+1, mkCharLenCE(begin, end-begin, enc));
      start = after;
    }
  }
  /* then the rest of the string. */
  SET_STRING_ELT(out, 2*i, mkCharCE(start, enc));

  UNPROTECT(1);
  return out;
}
Example #2
0
SEXP R_stri_length(SEXP vec) {
  int vec_len = LENGTH(vec);
  SEXP ret = PROTECT(allocVector(INTSXP, vec_len));
  int* retint = INTEGER(ret);

  for (int i = 0; i < vec_len; i++) {
    SEXP str = STRING_ELT(vec, i);
    if (str == NA_STRING) {
      retint[i] = NA_INTEGER;
      continue;
    }

    int str_len = LENGTH(str);
    if (getCharCE(str) == CE_LATIN1 || (getCharCE(str) == CE_NATIVE && getNativeCE() == CE_LATIN1)) {
      retint[i] = str_len;
    } else if (getCharCE(str) == CE_BYTES) {
      UNPROTECT(1);
      error("Invalid encoding: bytes.");
    } else if (getCharCE(str) == CE_UTF8 || (getCharCE(str) == CE_NATIVE && getNativeCE() == CE_UTF8)) {
      UChar32 out = 0;
      const char* source = CHAR(str);
      R_len_t j = 0;
      int count;
      for (count = 0; out >= 0 && j < str_len; count++) {
        U8_NEXT(source, j, str_len, out); // faster that U8_FWD_1 & gives bad UChar32s
      }
      if (out < 0) {
        warning("Invalid UTF8 string: %s", source);
        retint[i] = NA_INTEGER;
      } else {
        retint[i] = count;
      }
    } else if (native_is_singlebyte()) { // native-8bit
      retint[i] = str_len;
    } else {
      // native encoding, not 8 bit
      UErrorCode status = U_ZERO_ERROR;
      UConverter* conv = ucnv_open(NULL, &status);
      const char* source = CHAR(str);
      const char* sourceLimit = source + str_len;
      int j;
      for (j = 0; source != sourceLimit; j++) {
        ucnv_getNextUChar(conv, &source, sourceLimit, &status);
      }
      retint[i] = j; // all right, we got it!
    }
  }

  UNPROTECT(1);
  return ret;
}
Example #3
0
/* Uses R_alloc but called by a .Internal.  Result may be R_alloc-ed */
static const char *trChar(SEXP x)
{
    size_t n = strlen(CHAR(x));
    cetype_t ienc = getCharCE(x);

    if (ienc == CE_BYTES) {
	const char *p = CHAR(x), *q;
	char *pp = R_alloc(4*n+1, 1), *qq = pp, buf[5];
	for (q = p; *q; q++) {
	    unsigned char k = (unsigned char) *q;
	    if (k >= 0x20 && k < 0x80) {
		*qq++ = *q;
	    } else {
		snprintf(buf, 5, "\\x%02x", k);
		for(int j = 0; j < 4; j++) *qq++ = buf[j];
	    }
	}
	*qq = '\0';
	return pp;
    } else {
#ifdef Win32
	static char buf[106];
	char *p;
	/* Long strings will be rare, and few per cat() call so we
	   can afford to be profligate here: translateChar is */
	if (n < 100) p = buf; else p = R_alloc(n+7, 1);
	if (WinUTF8out && ienc == CE_UTF8) {
	    strcpy(p, UTF8in); strcat(p, CHAR(x)); strcat(p, UTF8out);
	    return p;
	} else
#endif
	    return translateChar(x);
    }
}
Example #4
0
SEXP attribute_hidden do_substr(SEXP call, SEXP op, SEXP args, SEXP env)
{
    SEXP s, x, sa, so, el;
    R_xlen_t i, len;
    int start, stop, k, l;
    size_t slen;
    cetype_t ienc;
    const char *ss;
    char *buf;

    checkArity(op, args);
    x = CAR(args);
    sa = CADR(args);
    so = CADDR(args);
    k = LENGTH(sa);
    l = LENGTH(so);

    if (!isString(x))
	error(_("extracting substrings from a non-character object"));
    len = XLENGTH(x);
    PROTECT(s = allocVector(STRSXP, len));
    if (len > 0) {
	if (!isInteger(sa) || !isInteger(so) || k == 0 || l == 0)
	    error(_("invalid substring arguments"));

	for (i = 0; i < len; i++) {
	    start = INTEGER(sa)[i % k];
	    stop = INTEGER(so)[i % l];
	    el = STRING_ELT(x,i);
	    if (el == NA_STRING || start == NA_INTEGER || stop == NA_INTEGER) {
		SET_STRING_ELT(s, i, NA_STRING);
		continue;
	    }
	    ienc = getCharCE(el);
	    ss = CHAR(el);
	    slen = strlen(ss); /* FIXME -- should handle embedded nuls */
	    buf = R_AllocStringBuffer(slen+1, &cbuff);
	    if (start < 1) start = 1;
	    if (start > stop || start > slen) {
		buf[0] = '\0';
	    } else {
		if (stop > slen) stop = (int) slen;
		substr(buf, ss, ienc, start, stop);
	    }
	    SET_STRING_ELT(s, i, mkCharCE(buf, ienc));
	}
	R_FreeStringBufferL(&cbuff);
    }
    DUPLICATE_ATTRIB(s, x);
    /* This copied the class, if any */
    UNPROTECT(1);
    return s;
}
Example #5
0
/* Match what EncodeString does with encodings */
attribute_hidden
int Rstrlen(SEXP s, int quote)
{
    cetype_t ienc = getCharCE(s);
    if (ienc == CE_UTF8 || ienc == CE_BYTES)
	return Rstrwid(CHAR(s), LENGTH(s), ienc, quote);
    const void *vmax = vmaxget();
    const char *p = translateChar(s);
    int len = Rstrwid(p, (int)strlen(p), CE_NATIVE, quote);
    vmaxset(vmax);
    return len;
}
Example #6
0
// formerly in src/main/platform.c
SEXP fileedit(SEXP call, SEXP op, SEXP args, SEXP rho)
{
    SEXP fn, ti, ed;
    const char **f, **title, *editor;
    int i, n;
    const void *vmax = vmaxget();

    args = CDR(args);
    fn = CAR(args); args = CDR(args);
    ti = CAR(args); args = CDR(args);
    ed = CAR(args);

    n = length(fn);
    if (!isString(ed) || length(ed) != 1)
	error(_("invalid '%s' specification"), "editor");
    if (n > 0) {
	if (!isString(fn))
	    error(_("invalid '%s' specification"), "filename");
	f = (const char**) R_alloc(n, sizeof(char*));
	title = (const char**) R_alloc(n, sizeof(char*));
	/* FIXME convert to UTF-8 on Windows */
	for (i = 0; i < n; i++) {
	    SEXP el = STRING_ELT(fn, 0);
	    if (!isNull(el))
#ifdef Win32
		f[i] = acopy_string(reEnc(CHAR(el), getCharCE(el), CE_UTF8, 1));
#else
		f[i] = acopy_string(translateChar(el));
#endif
	    else
		f[i] = "";
	    if (!isNull(STRING_ELT(ti, i)))
		title[i] = acopy_string(translateChar(STRING_ELT(ti, i)));
	    else
		title[i] = "";
	}
    }
Example #7
0
File: extra.c Project: csilles/cxxr
/* utils::shortPathName */
SEXP in_shortpath(SEXP paths)
{
    SEXP ans, el;
    int i, n = LENGTH(paths);
    char tmp[MAX_PATH];
    wchar_t wtmp[32768];
    DWORD res;
    const void *vmax = vmaxget();

    if(!isString(paths)) error(_("'path' must be a character vector"));

    PROTECT(ans = allocVector(STRSXP, n));
    for (i = 0; i < n; i++) {
	el = STRING_ELT(paths, i);
	if(getCharCE(el) == CE_UTF8) {
	    res = GetShortPathNameW(filenameToWchar(el, FALSE), wtmp, 32768);
	    if (res && res <= 32768)
		wcstoutf8(tmp, wtmp, wcslen(wtmp)+1);
	    else
		strcpy(tmp, translateChar(el));
	    /* documented to return paths using \, which the API call does
	       not necessarily do */
	    R_fixbackslash(tmp);
	    SET_STRING_ELT(ans, i, mkCharCE(tmp, CE_UTF8));
	} else {
	    res = GetShortPathName(translateChar(el), tmp, MAX_PATH);
	    if (res == 0 || res > MAX_PATH) strcpy(tmp, translateChar(el));
	    /* documented to return paths using \, which the API call does
	       not necessarily do */
	    R_fixbackslash(tmp);
	    SET_STRING_ELT(ans, i, mkChar(tmp));
	}
    }
    UNPROTECT(1);
    vmaxset(vmax);
    return ans;
}
Example #8
0
File: extra.c Project: csilles/cxxr
	/* GetNativeSystemInfo is XP or later */
	pGNSI = (PGNSI)
	    GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")),
			   "GetNativeSystemInfo");
	if(NULL != pGNSI) pGNSI(&si); else GetSystemInfo(&si);
	if(si.wProcessorArchitecture == PROCESSOR_ARCHITECTURE_AMD64)
	    strcat(ver, " x64");
    }
    SET_STRING_ELT(ans, 1, mkChar(ver));

    if((int)osvi.dwMajorVersion >= 5) {
	if(osvi.wServicePackMajor > 0)
	    snprintf(ver, 256, "build %d, Service Pack %d",
		     LOWORD(osvi.dwBuildNumber),
		     (int) osvi.wServicePackMajor);
	else snprintf(ver, 256, "build %d", LOWORD(osvi.dwBuildNumber));
    } else
	snprintf(ver, 256, "build %d, %s",
		 LOWORD(osvi.dwBuildNumber), osvi.szCSDVersion);
    SET_STRING_ELT(ans, 2, mkChar(ver));
    GetComputerNameW(name, &namelen);
    wcstoutf8(buf, name, 1000);
    SET_STRING_ELT(ans, 3, mkCharCE(buf, CE_UTF8));
#ifdef WIN64
    SET_STRING_ELT(ans, 4, mkChar("x86-64"));
#else
    SET_STRING_ELT(ans, 4, mkChar("x86"));
#endif
    GetUserNameW(user, &userlen);
    wcstoutf8(buf, user, 1000);
    SET_STRING_ELT(ans, 5, mkCharCE(buf, CE_UTF8));
    SET_STRING_ELT(ans, 6, STRING_ELT(ans, 5));
    SET_STRING_ELT(ans, 7, STRING_ELT(ans, 5));
    PROTECT(ansnames = allocVector(STRSXP, 8));
    SET_STRING_ELT(ansnames, 0, mkChar("sysname"));
    SET_STRING_ELT(ansnames, 1, mkChar("release"));
    SET_STRING_ELT(ansnames, 2, mkChar("version"));
    SET_STRING_ELT(ansnames, 3, mkChar("nodename"));
    SET_STRING_ELT(ansnames, 4, mkChar("machine"));
    SET_STRING_ELT(ansnames, 5, mkChar("login"));
    SET_STRING_ELT(ansnames, 6, mkChar("user"));
    SET_STRING_ELT(ansnames, 7, mkChar("effective_user"));
    setAttrib(ans, R_NamesSymbol, ansnames);
    UNPROTECT(2);
    return ans;
}

SEXP do_syssleep(SEXP call, SEXP op, SEXP args, SEXP rho)
{
    DWORD mtime;
    int ntime;
    double time;

    checkArity(op, args);
    time = asReal(CAR(args));
    if (ISNAN(time) || time < 0)
	errorcall(call, _("invalid '%s' value"), "time");
    ntime = 1000*(time) + 0.5;
    while (ntime > 0) {
	mtime = min(500, ntime);
	ntime -= mtime;
	Sleep(mtime);
	R_ProcessEvents();
    }
    return R_NilValue;
}

#ifdef LEA_MALLOC
#define MALLINFO_FIELD_TYPE size_t
struct mallinfo {
    MALLINFO_FIELD_TYPE arena;    /* non-mmapped space allocated from system */
    MALLINFO_FIELD_TYPE ordblks;  /* number of free chunks */
    MALLINFO_FIELD_TYPE smblks;   /* number of fastbin blocks */
    MALLINFO_FIELD_TYPE hblks;    /* number of mmapped regions */
    MALLINFO_FIELD_TYPE hblkhd;   /* space in mmapped regions */
    MALLINFO_FIELD_TYPE usmblks;  /* maximum total allocated space */
    MALLINFO_FIELD_TYPE fsmblks;  /* space available in freed fastbin blocks */
    MALLINFO_FIELD_TYPE uordblks; /* total allocated space */
    MALLINFO_FIELD_TYPE fordblks; /* total free space */
    MALLINFO_FIELD_TYPE keepcost; /* top-most, releasable (via malloc_trim) space */
};
extern R_size_t R_max_memory;

struct mallinfo mallinfo(void);
#endif 

SEXP in_memsize(SEXP ssize)
{
    SEXP ans;
    int maxmem = NA_LOGICAL;

    if(isLogical(ssize)) 
	maxmem = asLogical(ssize);
    else if(isReal(ssize)) {
	R_size_t newmax;
	double mem = asReal(ssize);
	if (!R_FINITE(mem))
	    error(_("incorrect argument"));
#ifdef LEA_MALLOC
#ifndef WIN64
	if(mem >= 4096)
	    error(_("don't be silly!: your machine has a 4Gb address limit"));
#endif
	newmax = mem * 1048576.0;
	if (newmax < R_max_memory)
	    warning(_("cannot decrease memory limit: ignored"));
	else
	    R_max_memory = newmax;
#endif
    } else
	error(_("incorrect argument"));
	
    PROTECT(ans = allocVector(REALSXP, 1));
#ifdef LEA_MALLOC
    if(maxmem == NA_LOGICAL)
	REAL(ans)[0] = R_max_memory;
    else if(maxmem)
	REAL(ans)[0] = mallinfo().usmblks;
    else
	REAL(ans)[0] = mallinfo().uordblks;
    REAL(ans)[0] /= 1048576.0;
#else
    REAL(ans)[0] = NA_REAL;
#endif
    UNPROTECT(1);
    return ans;
}

SEXP do_dllversion(SEXP call, SEXP op, SEXP args, SEXP rho)
{
    SEXP path = R_NilValue, ans;
    const wchar_t *dll;
    DWORD dwVerInfoSize;
    DWORD dwVerHnd;

    checkArity(op, args);
    path = CAR(args);
    if(!isString(path) || LENGTH(path) != 1)
	errorcall(call, _("invalid '%s' argument"), "path");
    dll = filenameToWchar(STRING_ELT(path, 0), FALSE);
    dwVerInfoSize = GetFileVersionInfoSizeW(dll, &dwVerHnd);
    PROTECT(ans = allocVector(STRSXP, 2));
    SET_STRING_ELT(ans, 0, mkChar(""));
    SET_STRING_ELT(ans, 1, mkChar(""));
    if (dwVerInfoSize) {
	BOOL  fRet;
	LPSTR lpstrVffInfo;
	LPSTR lszVer = NULL;
	UINT  cchVer = 0;

	lpstrVffInfo = (LPSTR) malloc(dwVerInfoSize);
	if (GetFileVersionInfoW(dll, 0L, dwVerInfoSize, lpstrVffInfo)) {

	    fRet = VerQueryValue(lpstrVffInfo,
				 TEXT("\\StringFileInfo\\040904E4\\FileVersion"),
				 (LPVOID)&lszVer, &cchVer);
	    if(fRet) SET_STRING_ELT(ans, 0, mkChar(lszVer));

	    fRet = VerQueryValue(lpstrVffInfo,
				 TEXT("\\StringFileInfo\\040904E4\\R Version"),
				 (LPVOID)&lszVer, &cchVer);
	    if(fRet) SET_STRING_ELT(ans, 1, mkChar(lszVer));
	    else {
		fRet = VerQueryValue(lpstrVffInfo,
				     TEXT("\\StringFileInfo\\040904E4\\Compiled under R Version"),
				     (LPVOID)&lszVer, &cchVer);
		if(fRet) SET_STRING_ELT(ans, 1, mkChar(lszVer));
	    }

	} else ans = R_NilValue;
	free(lpstrVffInfo);
    } else ans = R_NilValue;
    UNPROTECT(1);
    return ans;
}



int Rwin_rename(const char *from, const char *to)
{
    return (MoveFileEx(from, to, MOVEFILE_REPLACE_EXISTING | MOVEFILE_COPY_ALLOWED | MOVEFILE_WRITE_THROUGH) == 0);
}

int Rwin_wrename(const wchar_t *from, const wchar_t *to)
{
    return (MoveFileExW(from, to, MOVEFILE_REPLACE_EXISTING | MOVEFILE_COPY_ALLOWED | MOVEFILE_WRITE_THROUGH) == 0);
}


const char *formatError(DWORD res)
{
    static char buf[1000], *p;
    FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
		  NULL, res,
		  MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
		  buf, 1000, NULL);
    p = buf+strlen(buf) -1;
    if(*p == '\n') *p = '\0';
    p = buf+strlen(buf) -1;
    if(*p == '\r') *p = '\0';
    p = buf+strlen(buf) -1;
    if(*p == '.') *p = '\0';
    return buf;
}


void R_UTF8fixslash(char *s); /* from main/util.c */
SEXP do_normalizepath(SEXP call, SEXP op, SEXP args, SEXP rho)
{
    SEXP ans, paths = CAR(args), el, slash;
    int i, n = LENGTH(paths), res;
    char tmp[MAX_PATH], longpath[MAX_PATH], *tmp2;
    wchar_t wtmp[32768], wlongpath[32768], *wtmp2;
    int mustWork, fslash = 0;

    checkArity(op, args);
    if(!isString(paths))
	errorcall(call, _("'path' must be a character vector"));

    slash = CADR(args);
    if(!isString(slash) || LENGTH(slash) != 1)
	errorcall(call, "'winslash' must be a character string");
    const char *sl = CHAR(STRING_ELT(slash, 0));
    if (strcmp(sl, "/") && strcmp(sl, "\\"))
	errorcall(call, "'winslash' must be '/' or '\\\\'");
    if (strcmp(sl, "/") == 0) fslash = 1;
    
    mustWork = asLogical(CADDR(args));

    PROTECT(ans = allocVector(STRSXP, n));
    for (i = 0; i < n; i++) {
    	int warn = 0;
    	SEXP result;
	el = STRING_ELT(paths, i);
	result = el;
	if(getCharCE(el) == CE_UTF8) {
	    if ((res = GetFullPathNameW(filenameToWchar(el, FALSE), 32768, 
					wtmp, &wtmp2)) && res <= 32768) {
		if ((res = GetLongPathNameW(wtmp, wlongpath, 32768))
		    && res <= 32768) {
	    	    wcstoutf8(longpath, wlongpath, wcslen(wlongpath)+1);
		    if(fslash) R_UTF8fixslash(longpath);
	    	    result = mkCharCE(longpath, CE_UTF8);
		} else if(mustWork == 1) {
		    errorcall(call, "path[%d]=\"%s\": %s", i+1, 
			      translateChar(el), 
			      formatError(GetLastError()));	
	    	} else {
	    	    wcstoutf8(tmp, wtmp, wcslen(wtmp)+1);
		    if(fslash) R_UTF8fixslash(tmp);
	    	    result = mkCharCE(tmp, CE_UTF8);
	    	    warn = 1;
	    	}
	    } else if(mustWork == 1) {
		errorcall(call, "path[%d]=\"%s\": %s", i+1, 
			  translateChar(el), 
			  formatError(GetLastError()));	
	    } else {
		if (fslash) {
		    strcpy(tmp, translateCharUTF8(el));
		    R_UTF8fixslash(tmp);
	    	    result = mkCharCE(tmp, CE_UTF8);
		}
	    	warn = 1;
	    }
	    if (warn && (mustWork == NA_LOGICAL))
	    	warningcall(call, "path[%d]=\"%ls\": %s", i+1, 
			    filenameToWchar(el,FALSE), 
			    formatError(GetLastError()));
	} else {
	    if ((res = GetFullPathName(translateChar(el), MAX_PATH, tmp, &tmp2)) 
		&& res <= MAX_PATH) {
	    	if ((res = GetLongPathName(tmp, longpath, MAX_PATH))
		    && res <= MAX_PATH) {
		    if(fslash) R_fixslash(longpath);
	    	    result = mkChar(longpath);
		} else if(mustWork == 1) {
		    errorcall(call, "path[%d]=\"%s\": %s", i+1, 
			      translateChar(el), 
			      formatError(GetLastError()));	
	    	} else {
		    if(fslash) R_fixslash(tmp);
	    	    result = mkChar(tmp);
	    	    warn = 1;
	    	}
	    } else if(mustWork == 1) {
		errorcall(call, "path[%d]=\"%s\": %s", i+1, 
			  translateChar(el), 
			  formatError(GetLastError()));	
	    } else {
		if (fslash) {
		    strcpy(tmp, translateChar(el));
		    R_fixslash(tmp);
		    result = mkChar(tmp);
		}
	    	warn = 1;
	    }
	    if (warn && (mustWork == NA_LOGICAL))
		warningcall(call, "path[%d]=\"%s\": %s", i+1, 
			    translateChar(el), 
			    formatError(GetLastError()));	
	}
	SET_STRING_ELT(ans, i, result);
    }
    UNPROTECT(1);
    return ans;
}
Example #9
0
const char *EncodeString(SEXP s, int w, int quote, Rprt_adj justify)
{
    int b, b0, i, j, cnt;
    const char *p; char *q, buf[11];
    cetype_t ienc = CE_NATIVE;

    /* We have to do something like this as the result is returned, and
       passed on by EncodeElement -- so no way could be end user be
       responsible for freeing it.  However, this is not thread-safe. */

    static R_StringBuffer gBuffer = {NULL, 0, BUFSIZE};
    R_StringBuffer *buffer = &gBuffer;

    if (s == NA_STRING) {
	p = quote ? CHAR(R_print.na_string) : CHAR(R_print.na_string_noquote);
	cnt = i = (int)(quote ? strlen(CHAR(R_print.na_string)) :
			strlen(CHAR(R_print.na_string_noquote)));
	quote = 0;
    } else {
#ifdef Win32
	if(WinUTF8out) {
	    ienc = getCharCE(s);
	    if(ienc == CE_UTF8) {
		p = CHAR(s);
		i = Rstrlen(s, quote);
		cnt = LENGTH(s);
	    } else {
		p = translateChar0(s);
		if(p == CHAR(s)) {
		    i = Rstrlen(s, quote);
		    cnt = LENGTH(s);
		} else {
		    cnt = strlen(p);
		    i = Rstrwid(p, cnt, CE_NATIVE, quote);
		}
		ienc = CE_NATIVE;
	    }
	} else
#endif
	{
	    if(IS_BYTES(s)) {
		p = CHAR(s);
		cnt = (int) strlen(p);
		const char *q;
		char *pp = R_alloc(4*cnt+1, 1), *qq = pp, buf[5];
		for (q = p; *q; q++) {
		    unsigned char k = (unsigned char) *q;
		    if (k >= 0x20 && k < 0x80) {
			*qq++ = *q;
			if (quote && *q == '"') cnt++;
		    } else {
			snprintf(buf, 5, "\\x%02x", k);
			for(j = 0; j < 4; j++) *qq++ = buf[j];
			cnt += 3;
		    }
		}
		*qq = '\0';
		p = pp;
		i = cnt;
	    } else {
		p = translateChar(s);
		if(p == CHAR(s)) {
		    i = Rstrlen(s, quote);
		    cnt = LENGTH(s);
		} else {
		    cnt = (int) strlen(p);
		    i = Rstrwid(p, cnt, CE_NATIVE, quote);
		}
	    }
	}
    }

    /* We need enough space for the encoded string, including escapes.
       Octal encoding turns one byte into four.
       \u encoding can turn a multibyte into six or ten,
       but it turns 2/3 into 6, and 4 (and perhaps 5/6) into 10.
       Let's be wasteful here (the worst case appears to be an MBCS with
       one byte for an upper-plane Unicode point output as ten bytes,
       but I doubt that such an MBCS exists: two bytes is plausible).

       +2 allows for quotes, +6 for UTF_8 escapes.
     */
    q = R_AllocStringBuffer(imax2(5*cnt+8, w), buffer);
    b = w - i - (quote ? 2 : 0); /* total amount of padding */
    if(justify == Rprt_adj_none) b = 0;
    if(b > 0 && justify != Rprt_adj_left) {
	b0 = (justify == Rprt_adj_centre) ? b/2 : b;
	for(i = 0 ; i < b0 ; i++) *q++ = ' ';
	b -= b0;
    }
    if(quote) *q++ = (char) quote;
    if(mbcslocale || ienc == CE_UTF8) {
	int j, res;
	mbstate_t mb_st;
	wchar_t wc;
	unsigned int k; /* not wint_t as it might be signed */
#ifndef __STDC_ISO_10646__
	Rboolean Unicode_warning = FALSE;
#endif
	if(ienc != CE_UTF8)  mbs_init(&mb_st);
#ifdef Win32
	else if(WinUTF8out) { memcpy(q, UTF8in, 3); q += 3; }
#endif
	for (i = 0; i < cnt; i++) {
	    res = (int)((ienc == CE_UTF8) ? utf8toucs(&wc, p):
			mbrtowc(&wc, p, MB_CUR_MAX, NULL));
	    if(res >= 0) { /* res = 0 is a terminator */
		k = wc;
		/* To be portable, treat \0 explicitly */
		if(res == 0) {k = 0; wc = L'\0';}
		if(0x20 <= k && k < 0x7f && iswprint(wc)) {
		    switch(wc) {
		    case L'\\': *q++ = '\\'; *q++ = '\\'; p++;
			break;
		    case L'\'':
		    case L'"':
			if(quote == *p)  *q++ = '\\'; *q++ = *p++;
			break;
		    default:
			for(j = 0; j < res; j++) *q++ = *p++;
			break;
		    }
		} else if (k < 0x80) {
		    /* ANSI Escapes */
		    switch(wc) {
		    case L'\a': *q++ = '\\'; *q++ = 'a'; break;
		    case L'\b': *q++ = '\\'; *q++ = 'b'; break;
		    case L'\f': *q++ = '\\'; *q++ = 'f'; break;
		    case L'\n': *q++ = '\\'; *q++ = 'n'; break;
		    case L'\r': *q++ = '\\'; *q++ = 'r'; break;
		    case L'\t': *q++ = '\\'; *q++ = 't'; break;
		    case L'\v': *q++ = '\\'; *q++ = 'v'; break;
		    case L'\0': *q++ = '\\'; *q++ = '0'; break;

		    default:
			/* print in octal */
			snprintf(buf, 5, "\\%03o", k);
			for(j = 0; j < 4; j++) *q++ = buf[j];
			break;
		    }
		    p++;
		} else {
		    if(iswprint(wc)) {
			/* The problem here is that wc may be
			   printable according to the Unicode tables,
			   but it may not be printable on the output
			   device concerned. */
			for(j = 0; j < res; j++) *q++ = *p++;
		    } else {
#ifndef Win32
# ifndef __STDC_ISO_10646__
			Unicode_warning = TRUE;
# endif
			if(k > 0xffff)
			    snprintf(buf, 11, "\\U%08x", k);
			else
#endif
			    snprintf(buf, 11, "\\u%04x", k);
			j = (int) strlen(buf);
			memcpy(q, buf, j);
			q += j;
			p += res;
		    }
		    i += (res - 1);
		}

	    } else { /* invalid char */
		snprintf(q, 5, "\\x%02x", *((unsigned char *)p));
		q += 4; p++;
	    }
	}
#ifndef __STDC_ISO_10646__
	if(Unicode_warning)
	    warning(_("it is not known that wchar_t is Unicode on this platform"));
#endif

    } else
	for (i = 0; i < cnt; i++) {

	    /* ASCII */
	    if((unsigned char) *p < 0x80) {
		if(*p != '\t' && isprint((int)*p)) { /* Windows has \t as printable */
		    switch(*p) {
		    case '\\': *q++ = '\\'; *q++ = '\\'; break;
		    case '\'':
		    case '"':
			if(quote == *p)  *q++ = '\\'; *q++ = *p; break;
		    default: *q++ = *p; break;
		    }
		} else switch(*p) {
			/* ANSI Escapes */
		    case '\a': *q++ = '\\'; *q++ = 'a'; break;
		    case '\b': *q++ = '\\'; *q++ = 'b'; break;
		    case '\f': *q++ = '\\'; *q++ = 'f'; break;
		    case '\n': *q++ = '\\'; *q++ = 'n'; break;
		    case '\r': *q++ = '\\'; *q++ = 'r'; break;
		    case '\t': *q++ = '\\'; *q++ = 't'; break;
		    case '\v': *q++ = '\\'; *q++ = 'v'; break;
		    case '\0': *q++ = '\\'; *q++ = '0'; break;

		    default:
			/* print in octal */
			snprintf(buf, 5, "\\%03o", (unsigned char) *p);
			for(j = 0; j < 4; j++) *q++ = buf[j];
			break;
		    }
		p++;
	    } else {  /* 8 bit char */
#ifdef Win32 /* It seems Windows does not know what is printable! */
		*q++ = *p++;
#else
		if(!isprint((int)*p & 0xff)) {
		    /* print in octal */
		    snprintf(buf, 5, "\\%03o", (unsigned char) *p);
		    for(j = 0; j < 4; j++) *q++ = buf[j];
		    p++;
		} else *q++ = *p++;
#endif
	    }
	}

#ifdef Win32
    if(WinUTF8out && ienc == CE_UTF8)  { memcpy(q, UTF8out, 3); q += 3; }
#endif
    if(quote) *q++ = (char) quote;
    if(b > 0 && justify != Rprt_adj_right) {
	for(i = 0 ; i < b ; i++) *q++ = ' ';
    }
    *q = '\0';
    return buffer->data;
}
Example #10
0
SEXP attribute_hidden do_substrgets(SEXP call, SEXP op, SEXP args, SEXP env)
{
    SEXP s, x, sa, so, value, el, v_el;
    R_xlen_t i, len;
    int start, stop, k, l, v;
    size_t slen;
    cetype_t ienc, venc;
    const char *ss, *v_ss;
    char *buf;
    const void *vmax;

    checkArity(op, args);
    x = CAR(args);
    sa = CADR(args);
    so = CADDR(args);
    value = CADDDR(args);
    k = LENGTH(sa);
    l = LENGTH(so);

    if (!isString(x))
	error(_("replacing substrings in a non-character object"));
    len = LENGTH(x);
    PROTECT(s = allocVector(STRSXP, len));
    if (len > 0) {
	if (!isInteger(sa) || !isInteger(so) || k == 0 || l == 0)
	    error(_("invalid substring arguments"));

	v = LENGTH(value);
	if (!isString(value) || v == 0) error(_("invalid value"));

	vmax = vmaxget();
	for (i = 0; i < len; i++) {
	    el = STRING_ELT(x, i);
	    v_el = STRING_ELT(value, i % v);
	    start = INTEGER(sa)[i % k];
	    stop = INTEGER(so)[i % l];
	    if (el == NA_STRING || v_el == NA_STRING ||
		start == NA_INTEGER || stop == NA_INTEGER) {
		SET_STRING_ELT(s, i, NA_STRING);
		continue;
	    }
	    ienc = getCharCE(el);
	    ss = CHAR(el);
	    slen = strlen(ss);
	    if (start < 1) start = 1;
	    if (stop > slen) stop = (int) slen; /* SBCS optimization */
	    if (start > stop) {
		/* just copy element across */
		SET_STRING_ELT(s, i, STRING_ELT(x, i));
	    } else {
		int ienc2 = ienc;
		v_ss = CHAR(v_el);
		/* is the value in the same encoding?
		   FIXME: could prefer UTF-8 here
		 */
		venc = getCharCE(v_el);
		if (venc != ienc && !strIsASCII(v_ss)) {
		    ss = translateChar(el);
		    slen = strlen(ss);
		    v_ss = translateChar(v_el);
		    ienc2 = CE_NATIVE;
		}
		/* might expand under MBCS */
		buf = R_AllocStringBuffer(slen+strlen(v_ss), &cbuff);
		strcpy(buf, ss);
		substrset(buf, v_ss, ienc2, start, stop);
		SET_STRING_ELT(s, i, mkCharCE(buf, ienc2));
	    }
	    vmaxset(vmax);
	}
	R_FreeStringBufferL(&cbuff);
    }
    UNPROTECT(1);
    return s;
}
Example #11
0
SEXP do_system(SEXP call, SEXP op, SEXP args, SEXP rho)
{
    rpipe *fp;
    char  buf[INTERN_BUFSIZE];
    const char *fout = "", *ferr = "";
    int   vis = 0, flag = 2, i = 0, j, ll = 0;
    SEXP  cmd, fin, Stdout, Stderr, tlist = R_NilValue, tchar, rval;
    int timeout = 0, timedout = 0;

    checkArity(op, args);
    cmd = CAR(args);
    if (!isString(cmd) || LENGTH(cmd) != 1)
	errorcall(call, _("character string expected as first argument"));
    args = CDR(args);
    flag = asInteger(CAR(args)); args = CDR(args);
    if (flag >= 20) {vis = -1; flag -= 20;}
    else if (flag >= 10) {vis = 0; flag -= 10;}
    else vis = 1;

    fin = CAR(args);
    if (!isString(fin))
	errorcall(call, _("character string expected as third argument"));
    args = CDR(args);
    Stdout = CAR(args);
    args = CDR(args);
    Stderr = CAR(args);
    args = CDR(args);
    timeout = asInteger(CAR(args));
    if (timeout == NA_INTEGER || timeout < 0 || timeout > 2000000)
	/* the limit could be increased, but not much as in milliseconds it
	   has to fit into a 32-bit unsigned integer */
	errorcall(call, _("invalid '%s' argument"), "timeout");
    if (timeout && !flag)
	errorcall(call, "Timeout with background running processes is not supported.");

    if (CharacterMode == RGui) {
	/* This is a rather conservative approach: if
	   Rgui is launched from a console window it does have
	   standard handles -- but users might well not expect that.
	*/
	SetStdHandle(STD_INPUT_HANDLE, INVALID_HANDLE_VALUE);
	SetStdHandle(STD_OUTPUT_HANDLE, INVALID_HANDLE_VALUE);
	SetStdHandle(STD_ERROR_HANDLE, INVALID_HANDLE_VALUE);
	if (TYPEOF(Stdout) == STRSXP) fout = CHAR(STRING_ELT(Stdout, 0));
	if (TYPEOF(Stderr) == STRSXP) ferr = CHAR(STRING_ELT(Stderr, 0));
    } else {
	if (flag == 2) flag = 1; /* ignore std.output.on.console */
	if (TYPEOF(Stdout) == STRSXP) fout = CHAR(STRING_ELT(Stdout, 0));
	else if (asLogical(Stdout) == 0) fout = NULL;
	if (TYPEOF(Stderr) == STRSXP) ferr = CHAR(STRING_ELT(Stderr, 0));
	else if (asLogical(Stderr) == 0) ferr = NULL;
    }

    if (flag < 2) { /* Neither intern = TRUE nor
		       show.output.on.console for Rgui */
	ll = runcmd_timeout(CHAR(STRING_ELT(cmd, 0)),
		    getCharCE(STRING_ELT(cmd, 0)),
		    flag, vis, CHAR(STRING_ELT(fin, 0)), fout, ferr,
		    timeout, &timedout);
    } else {
	/* read stdout +/- stderr from pipe */
	int m = 0;
	if(flag == 2 /* show on console */ || CharacterMode == RGui) m = 3;
	if(TYPEOF(Stderr) == LGLSXP)
	    m = asLogical(Stderr) ? 2 : 0;
	if(m  && TYPEOF(Stdout) == LGLSXP && asLogical(Stdout)) m = 3;
	fp = rpipeOpen(CHAR(STRING_ELT(cmd, 0)), getCharCE(STRING_ELT(cmd, 0)),
		       vis, CHAR(STRING_ELT(fin, 0)), m, fout, ferr, timeout);
	if (!fp) {
	    /* If intern = TRUE generate an error */
	    if (flag == 3) error(runerror());
	    ll = NOLAUNCH;
	} else {
	    /* FIXME: use REPROTECT */
	    if (flag == 3) {
		PROTECT(tlist);
		/* honour intern = FALSE, ignore.stdout = TRUE */
		if (m > 0 ||
		    (!(TYPEOF(Stdout) == LGLSXP && !asLogical(Stdout))))
		    for (i = 0; rpipeGets(fp, buf, INTERN_BUFSIZE); i++) {
			ll = strlen(buf) - 1;
			if ((ll >= 0) && (buf[ll] == '\n')) buf[ll] = '\0';
			tchar = mkChar(buf);
			UNPROTECT(1); /* tlist */
			PROTECT(tlist = CONS(tchar, tlist));
		    }

	    } else {
		for (i = 0; rpipeGets(fp, buf, INTERN_BUFSIZE); i++)
		    R_WriteConsole(buf, strlen(buf));
	    }
	    ll = rpipeClose(fp, &timedout);
	}
    }
    if (timedout) {
	ll = 124;
	warningcall(R_NilValue, _("command '%s' timed out"),
	            CHAR(STRING_ELT(cmd, 0)));
    } else if (flag == 3 && ll) {
	warningcall(R_NilValue, 
		    _("running command '%s' had status %d"), 
		    CHAR(STRING_ELT(cmd, 0)), ll);
    }
    if (flag == 3) { /* intern = TRUE: convert pairlist to list */
	PROTECT(rval = allocVector(STRSXP, i));
	for (j = (i - 1); j >= 0; j--) {
	    SET_STRING_ELT(rval, j, CAR(tlist));
	    tlist = CDR(tlist);
	}
	if(ll) {
	    SEXP lsym = install("status");
	    setAttrib(rval, lsym, ScalarInteger(ll));
	}
	UNPROTECT(2);
	return rval;
    } else {
	rval = ScalarInteger(ll);
	R_Visible = 0;
	return rval;
    }
}
Example #12
0
SEXP getfmts(SEXP format)
{
    int cnt, v, nfmt;
    char fmt[MAXLINE+1], bit[MAXLINE+1];
    const char *formatString;
    size_t n, cur, chunk, maxlen = 0;

    int nthis, nstar;
    Rboolean use_UTF8;
    
    SEXP res = PROTECT(allocVector(STRSXP, MAXNARGS));
    
#define SET_RESULT(n, s) {						\
     if (n >= MAXNARGS) error(_("only %d arguments are allowed"), MAXNARGS); \
	maxlen = (n) < maxlen ? maxlen : (n) + 1;			\
	SET_STRING_ELT(res, (n), mkChar(s));				\
    }
    
    if (!isString(format)) error(_("'fmt' is not a character vector"));
    nfmt = LENGTH(format);
    if (nfmt != 1) 
        error(_("'fmt' must be length 1"));

    use_UTF8 = getCharCE(STRING_ELT(format, 0)) == CE_UTF8;
    formatString = TRANSLATE_CHAR(format, 0);
    n = strlen(formatString);
    if (n > MAXLINE)
	error(_("'fmt' length exceeds maximal format length %d"), MAXLINE);
    /* process the format string */
    for (cur = 0, cnt = 0; cur < n; cur += chunk) {
	const char *curFormat = formatString + cur;
	char *starc;
	if (formatString[cur] == '%') { /* handle special format command */

	    if (cur < n - 1 && formatString[cur + 1] == '%') {
		/* take care of %% in the format */
		chunk = 2;
		strcpy(bit, "%");
	    }
	    else {
		/* recognise selected types from Table B-1 of K&R */
		/* NB: we deal with "%%" in branch above. */
		/* This is MBCS-OK, as we are in a format spec */
		    
		/*  Include formats c, u, p and n as well as the R formats; this needs to match */
		/*  C code as well */
		chunk = strcspn(curFormat + 1, "diosfeEgGxXaAcupn") + 2;
		if (cur + chunk > n)
		    error(_("unrecognised format specification '%s'"), curFormat);

		strncpy(fmt, curFormat, chunk);
		fmt[chunk] = '\0';

		nthis = -1;
		/* now look for %n$ or %nn$ form */
		if (strlen(fmt) > 3 && fmt[1] >= '1' && fmt[1] <= '9') {
		    v = fmt[1] - '0';
		    if(fmt[2] == '$') {
			nthis = v-1;
			memmove(fmt+1, fmt+3, strlen(fmt)-2);
		    } else if(fmt[2] >= '0' && fmt[2] <= '9' && fmt[3] == '$') {
			v = 10*v + fmt[2] - '0';
			nthis = v-1;
			memmove(fmt+1, fmt+4, strlen(fmt)-3);
		    }
		}

		starc = Rf_strchr(fmt, '*');
		if (starc) { /* handle  *  format if present */
		    nstar = -1;
		    if (strlen(starc) > 3 && starc[1] >= '1' && starc[1] <= '9') {
			v = starc[1] - '0';
			if(starc[2] == '$') {
			    nstar = v-1;
			    memmove(starc+1, starc+3, strlen(starc)-2);
			} else if(starc[2] >= '0' && starc[2] <= '9'
				  && starc[3] == '$') {
			    v = 10*v + starc[2] - '0';
			    nstar = v-1;
			    memmove(starc+1, starc+4, strlen(starc)-3);
			}
		    }

		    if(nstar < 0) {
			nstar = cnt++;
		    }

		    if (Rf_strchr(starc+1, '*'))
			error(_("at most one asterisk '*' is supported in each conversion specification"));

		    SET_RESULT(nstar, "*");

		}

		if (fmt[strlen(fmt) - 1] == '%') {
		} else {
		    if(nthis < 0) {
			nthis = cnt++;
		    }
		    SET_RESULT(nthis, fmt);
		}
	    }
	}
	else { /* not '%' : handle string part */
	    char *ch = Rf_strchr(curFormat, '%'); /* MBCS-aware version used */
	    chunk = (ch) ? (size_t) (ch - curFormat) : strlen(curFormat);
	    strncpy(bit, curFormat, chunk);
	    bit[chunk] = '\0';
	}
    }  /* end for ( each chunk ) */

    res = xlengthgets(res, maxlen);
    UNPROTECT(1);
    return res;
}
Example #13
0
// count strings x, their prefixes or suffixes
// and return counts greater than lower.
//
// note that for string counting the number of
// nodes usually will be much greater than the
// number of result strings. however, we could
// not determine a better upper bound without 
// traversing the tree.
SEXP R_utf8CountString(SEXP x, SEXP R_n, SEXP R_lower, SEXP R_type,
		       SEXP R_verbose, SEXP R_persistent, SEXP R_useBytes) {
    if (!persistent && rpn) {
	cpnfree(rpn);
	rpn = 0;
	warning("cleaning up stale state");
    }
    if (isNull(x) || TYPEOF(x) != VECSXP)
	error("'x' not of type list");
    if (isNull(R_n) || TYPEOF(R_n) != INTSXP)
	error("'n' not of type integer");
    if (isNull(R_lower) || TYPEOF(R_lower) != INTSXP)
	error("'lower' not of type integer");
    if (isNull(R_type) || TYPEOF(R_type) != INTSXP)
	error("'type' not of type integer");
    if (isNull(R_verbose) || TYPEOF(R_verbose) != LGLSXP)
	error("'verbose' not of type logical");
    if (isNull(R_persistent) || TYPEOF(R_persistent) != LGLSXP)
	error("'persistent' not of type logical");
    if (isNull(R_useBytes) || TYPEOF(R_useBytes) != LGLSXP)
	error("'useBytes' not of type logical");
    long l, n = 0;
    int h, i, j, k, type;
    const unsigned char *c;
    SEXP r, s;

    if (!persistent) {
	known_to_be_utf8   = utf8locale();
	known_to_be_latin1 = latin1locale();
	use_bytes          = *LOGICAL(R_useBytes);
    } else
    if (use_bytes != *LOGICAL(R_useBytes))
	error("change of useBytes in persistent mode");
    else
    if (known_to_be_utf8   != utf8locale() ||
        known_to_be_latin1 != latin1locale())
	error_reset("change of locale in persistent mode");
    persistent = LOGICAL(R_persistent)[0];

    if (!persistent) {
	tcnt = INTEGER(R_lower)[0];
	if (tcnt < 0)
	    error_reset("'lower' invalid value");
    }

    type = INTEGER(R_type)[0];

    switch (type) {
	case 0:				// strings
	    inc = 0;
	    break;
	case 1:				// prefixes
	case 2:				// suffixes
	case 3:
	    n = INTEGER(R_n)[0];
	    if (n < 0) 
		error_reset("'n' invalid value");
	    if (n == 0)
		return R_NilValue;
	    inc = 1;
	    break;
	default:
	    error_reset("'type' invalid value");
    }
#ifdef _TIME_H
    clock_t t2, t1, t0 = clock();
    if (LOGICAL(R_verbose)[0] == TRUE) {
	Rprintf("counting ...");
#ifdef __DEBUG
	Rprintf("\n");
#endif
    }
#endif

    nap = 0;

    for (i = 0; i < LENGTH(x); i++) {
	r = VECTOR_ELT(x, i);
	if (TYPEOF(r) != STRSXP)
	    error_reset("not of type character");
	for (j = 0; j < LENGTH(r); j++) {
	    s = STRING_ELT(r, j);
	    l = LENGTH(s);
	    if (s == NA_STRING || !l)
		continue;
#ifdef __TRANSLATE
	    if (!use_bytes) {
		c = (const unsigned char *) translateChar(s);
		l = strlen((const char *) c);
	    } else
		c = (const unsigned char *) CHAR(s);
#else
	    c = (const unsigned char *) CHAR(s);
#endif
	    if (!use_bytes &&
		known_to_be_utf8 &&
		tau_pcre_valid_utf8(c, l) >= 0)
		error_reset("not a valid UTF-8 string");
	    if (type > 1) {
		if (reverse_copy_utf8(c, l, n) >= 0)
		    error_reset("cannot copy string to buffer");
	    } else {
		if (type < 1)
		    n = l;
		h = 0;
		for (k = 0; k < l; k++) {
		    if (c[k] == '\0')
			continue;
		    if (k < __CBUF_SIZE - 1)
			cbuf[k] = c[k];
		    else 
			error_reset("cannot copy string to buffer");
		    if (use_bytes ||
			!known_to_be_utf8 ||
			(c[k] & 0xC0) != 0x80) {
			h++;
			if (h > n)
			    break;
		    }
		}
		cbuf[k] = 0;
	    }
#ifdef __DEBUG
	    Rprintf(" %s\n", cbuf);
#endif
	    h = nap + 1;
	    lpn = 0;
	    rpn = cpncount(rpn, cbuf);

	    if (nap != h)
		error_reset("cannot add string to tree");

	    if (!inc) {
		if (lpn)	    // should never be NULL
		    lpn->count++;
	    }
	}
	R_CheckUserInterrupt();
    }
#ifdef _TIME_H
    t1 = clock();
    if (LOGICAL(R_verbose)[0] == TRUE) {
	Rprintf(" %i string(s) using %i nodes [%.2fs]\n", nap, ncpn,
		((double) t1 - t0) / CLOCKS_PER_SEC);
	if (!persistent)
	    Rprintf("writing  ...");
#ifdef __DEBUG
	Rprintf("\n");
#endif
    }
#endif
    if (persistent)
	return R_NilValue;

    nap  = enc = 0;
    rval = PROTECT(allocVector(INTSXP, ncpn));
    setAttrib(rval, R_NamesSymbol, (nval = allocVector(STRSXP, ncpn)));

    cpnretprefix(rpn, 0);

    if (ncpn) {
	cpnfree(rpn);
	rpn = 0;
	error_reset("cannot retrieve count(s)");
    }
    rpn = 0;

    // reverse the reversed strings
    if (type == 2)
	for (i = 0; i < nap; i++) {
	    s = STRING_ELT(nval, i);
	    reverse_copy_utf8((const unsigned char *) CHAR(s), LENGTH(s), -1);
	    SET_STRING_ELT(nval, i, mkCharCE((const char *) cbuf, 
					     getCharCE(s)));
	}
#ifdef _TIME_H
    t2 = clock();
    if (LOGICAL(R_verbose)[0] == TRUE)
	Rprintf(" %i strings [%.2fs]\n", nap,
		((double) t2 - t1) / CLOCKS_PER_SEC); 
#endif
    // reduce
    if (nap < LENGTH(rval)) {
	r = PROTECT(allocVector(INTSXP, nap));
	setAttrib(r, R_NamesSymbol, (s = allocVector(STRSXP, nap)));
	while (nap-- > 0) {
	    INTEGER(r)[nap] = INTEGER(rval)[nap];
	    SET_STRING_ELT(s, nap, STRING_ELT(nval, nap));
	}
	UNPROTECT(2);

	return r;
    }
    UNPROTECT(1);

    return rval;
}
Example #14
0
/* Calculate the bounding rectangle for a string.
 * x and y assumed to be in INCHES.
 */
void textRect(double x, double y, SEXP text, int i,
	      const pGEcontext gc,
	      double xadj, double yadj,
	      double rot, pGEDevDesc dd, LRect *r)
{
    /* NOTE that we must work in inches for the angles to be correct
     */
    LLocation bl, br, tr, tl;
    LLocation tbl, tbr, ttr, ttl;
    LTransform thisLocation, thisRotation, thisJustification;
    LTransform tempTransform, transform;
    double w, h;
    if (isExpression(text)) {
	SEXP expr = VECTOR_ELT(text, i % LENGTH(text));
	w = fromDeviceWidth(GEExpressionWidth(expr, gc, dd),
			    GE_INCHES, dd);
	h = fromDeviceHeight(GEExpressionHeight(expr, gc, dd),
			     GE_INCHES, dd);
    } else {
	const char* string = CHAR(STRING_ELT(text, i % LENGTH(text)));
	w = fromDeviceWidth(GEStrWidth(string,
				       (gc->fontface == 5) ? CE_SYMBOL :
				       getCharCE(STRING_ELT(text, i % LENGTH(text))),
				       gc, dd),
			    GE_INCHES, dd);
	h = fromDeviceHeight(GEStrHeight(string,
					 (gc->fontface == 5) ? CE_SYMBOL :
					 getCharCE(STRING_ELT(text, i % LENGTH(text))),
					 gc, dd),
			     GE_INCHES, dd);
    }
    location(0, 0, bl);
    location(w, 0, br);
    location(w, h, tr);
    location(0, h, tl);
    translation(-xadj*w, -yadj*h, thisJustification);
    translation(x, y, thisLocation);
    if (rot != 0)
	rotation(rot, thisRotation);
    else
	identity(thisRotation);
    /* Position relative to origin of rotation THEN rotate.
     */
    multiply(thisJustification, thisRotation, tempTransform);
    /* Translate to (x, y)
     */
    multiply(tempTransform, thisLocation, transform);
    trans(bl, transform, tbl);
    trans(br, transform, tbr);
    trans(tr, transform, ttr);
    trans(tl, transform, ttl);
    rect(locationX(tbl), locationX(tbr), locationX(ttr), locationX(ttl),
	 locationY(tbl), locationY(tbr), locationY(ttr), locationY(ttl),
	 r);
    /* For debugging, the following prints out an R statement to draw the
     * bounding box
     */
    /*
    Rprintf("\ngrid.lines(c(%f, %f, %f, %f, %f), c(%f, %f, %f, %f, %f), default.units=\"inches\")\n",
	locationX(tbl), locationX(tbr), locationX(ttr), locationX(ttl),
	 locationX(tbl),
	 locationY(tbl), locationY(tbr), locationY(ttr), locationY(ttl),
	 locationY(tbl)
	 );
    */
}
Example #15
0
SEXP attribute_hidden do_sprintf(SEXP call, SEXP op, SEXP args, SEXP env)
{
    int i, nargs, cnt, v, thislen, nfmt, nprotect = 0;
    /* fmt2 is a copy of fmt with '*' expanded.
       bit will hold numeric formats and %<w>s, so be quite small. */
    char fmt[MAXLINE+1], fmt2[MAXLINE+10], *fmtp, bit[MAXLINE+1],
	*outputString;
    const char *formatString;
    size_t n, cur, chunk;

    SEXP format, _this, a[MAXNARGS], ans /* -Wall */ = R_NilValue;
    int ns, maxlen, lens[MAXNARGS], nthis, nstar, star_arg = 0;
    static R_StringBuffer outbuff = {NULL, 0, MAXELTSIZE};
    Rboolean has_star, use_UTF8;

#define _my_sprintf(_X_)						\
    {									\
	int nc = snprintf(bit, MAXLINE+1, fmtp, _X_);			\
	if (nc > MAXLINE)						\
	    error(_("required resulting string length %d is greater than maximal %d"), \
		  nc, MAXLINE);						\
    }

    nargs = length(args);
    /* grab the format string */
    format = CAR(args);
    if (!isString(format))
	error(_("'fmt' is not a character vector"));
    nfmt = length(format);
    if (nfmt == 0) return allocVector(STRSXP, 0);
    args = CDR(args); nargs--;
    if(nargs >= MAXNARGS)
	error(_("only %d arguments are allowed"), MAXNARGS);

    /* record the args for possible coercion and later re-ordering */
    for(i = 0; i < nargs; i++, args = CDR(args)) {
	SEXPTYPE t_ai;
	a[i] = CAR(args);
	if((t_ai = TYPEOF(a[i])) == LANGSXP || t_ai == SYMSXP) /* << maybe add more .. */
	    error(_("invalid type of argument[%d]: '%s'"),
		  i+1, CHAR(type2str(t_ai)));
	lens[i] = length(a[i]);
	if(lens[i] == 0) return allocVector(STRSXP, 0);
    }

#define CHECK_maxlen							\
    maxlen = nfmt;							\
    for(i = 0; i < nargs; i++)						\
	if(maxlen < lens[i]) maxlen = lens[i];				\
    if(maxlen % nfmt)							\
	error(_("arguments cannot be recycled to the same length"));	\
    for(i = 0; i < nargs; i++)						\
	if(maxlen % lens[i])						\
	    error(_("arguments cannot be recycled to the same length"))

    CHECK_maxlen;

    outputString = R_AllocStringBuffer(0, &outbuff);

    /* We do the format analysis a row at a time */
    for(ns = 0; ns < maxlen; ns++) {
	outputString[0] = '\0';
	use_UTF8 = getCharCE(STRING_ELT(format, ns % nfmt)) == CE_UTF8;
	if (!use_UTF8) {
	    for(i = 0; i < nargs; i++) {
		if (!isString(a[i])) continue;
		if (getCharCE(STRING_ELT(a[i], ns % lens[i])) == CE_UTF8) {
		    use_UTF8 = TRUE; break;
		}
	    }
	}

	formatString = TRANSLATE_CHAR(format, ns % nfmt);
	n = strlen(formatString);
	if (n > MAXLINE)
	    error(_("'fmt' length exceeds maximal format length %d"), MAXLINE);
	/* process the format string */
	for (cur = 0, cnt = 0; cur < n; cur += chunk) {
	    const char *curFormat = formatString + cur, *ss;
	    char *starc;
	    ss = NULL;
	    if (formatString[cur] == '%') { /* handle special format command */

		if (cur < n - 1 && formatString[cur + 1] == '%') {
		    /* take care of %% in the format */
		    chunk = 2;
		    strcpy(bit, "%");
		}
		else {
		    /* recognise selected types from Table B-1 of K&R */
		    /* NB: we deal with "%%" in branch above. */
		    /* This is MBCS-OK, as we are in a format spec */
		    chunk = strcspn(curFormat + 1, "diosfeEgGxXaA") + 2;
		    if (cur + chunk > n)
			error(_("unrecognised format specification '%s'"), curFormat);

		    strncpy(fmt, curFormat, chunk);
		    fmt[chunk] = '\0';

		    nthis = -1;
		    /* now look for %n$ or %nn$ form */
		    if (strlen(fmt) > 3 && fmt[1] >= '1' && fmt[1] <= '9') {
			v = fmt[1] - '0';
			if(fmt[2] == '$') {
			    if(v > nargs)
				error(_("reference to non-existent argument %d"), v);
			    nthis = v-1;
			    memmove(fmt+1, fmt+3, strlen(fmt)-2);
			} else if(fmt[2] >= '0' && fmt[2] <= '9' && fmt[3] == '$') {
			    v = 10*v + fmt[2] - '0';
			    if(v > nargs)
				error(_("reference to non-existent argument %d"), v);
			    nthis = v-1;
			    memmove(fmt+1, fmt+4, strlen(fmt)-3);
			}
		    }

		    starc = Rf_strchr(fmt, '*');
		    if (starc) { /* handle  *  format if present */
			nstar = -1;
			if (strlen(starc) > 3 && starc[1] >= '1' && starc[1] <= '9') {
			    v = starc[1] - '0';
			    if(starc[2] == '$') {
				if(v > nargs)
				    error(_("reference to non-existent argument %d"), v);
				nstar = v-1;
				memmove(starc+1, starc+3, strlen(starc)-2);
			    } else if(starc[2] >= '0' && starc[2] <= '9'
				      && starc[3] == '$') {
				v = 10*v + starc[2] - '0';
				if(v > nargs)
				    error(_("reference to non-existent argument %d"), v);
				nstar = v-1;
				memmove(starc+1, starc+4, strlen(starc)-3);
			    }
			}

			if(nstar < 0) {
			    if (cnt >= nargs) error(_("too few arguments"));
			    nstar = cnt++;
			}

			if (Rf_strchr(starc+1, '*'))
			    error(_("at most one asterisk '*' is supported in each conversion specification"));

			_this = a[nstar];
			if(ns == 0 && TYPEOF(_this) == REALSXP) {
			    _this = coerceVector(_this, INTSXP);
			    PROTECT(a[nstar] = _this);
			    nprotect++;
			}
			if(TYPEOF(_this) != INTSXP || LENGTH(_this)<1 ||
			   INTEGER(_this)[ns % LENGTH(_this)] == NA_INTEGER)
			    error(_("argument for '*' conversion specification must be a number"));
			star_arg = INTEGER(_this)[ns % LENGTH(_this)];
			has_star = TRUE;
		    }
		    else
			has_star = FALSE;

		    if (fmt[strlen(fmt) - 1] == '%') {
			/* handle % with formatting options */
			if (has_star)
			    snprintf(bit, MAXLINE+1, fmt, star_arg);
			else
			    strcpy(bit, fmt);
			/* was sprintf(..)  for which some compiler warn */
		    } else {
			Rboolean did_this = FALSE;
			if(nthis < 0) {
			    if (cnt >= nargs) error(_("too few arguments"));
			    nthis = cnt++;
			}
			_this = a[nthis];
			if (has_star) {
			    size_t nf; char *p, *q = fmt2;
			    for (p = fmt; *p; p++)
				if (*p == '*') q += sprintf(q, "%d", star_arg);
				else *q++ = *p;
			    *q = '\0';
			    nf = strlen(fmt2);
			    if (nf > MAXLINE)
				error(_("'fmt' length exceeds maximal format length %d"),
				      MAXLINE);
			    fmtp = fmt2;
			} else fmtp = fmt;

#define CHECK_this_length						\
			PROTECT(_this);					\
			thislen = length(_this);			\
			if(thislen == 0)				\
			    error(_("coercion has changed vector length to 0"))

			/* Now let us see if some minimal coercion
			   would be sensible, but only do so once, for ns = 0: */
			if(ns == 0) {
			    SEXP tmp; Rboolean do_check;
			    switch(*findspec(fmtp)) {
			    case 'd':
			    case 'i':
			    case 'o':
			    case 'x':
			    case 'X':
				if(TYPEOF(_this) == REALSXP) {
				    double r = REAL(_this)[0];
				    if((double)((int) r) == r)
					_this = coerceVector(_this, INTSXP);
				    PROTECT(a[nthis] = _this);
				    nprotect++;
				}
				break;
			    case 'a':
			    case 'A':
			    case 'e':
			    case 'f':
			    case 'g':
			    case 'E':
			    case 'G':
				if(TYPEOF(_this) != REALSXP &&
				   /* no automatic as.double(<string>) : */
				   TYPEOF(_this) != STRSXP) {
				    PROTECT(tmp = lang2(install("as.double"), _this));
#define COERCE_THIS_TO_A						\
				    _this = eval(tmp, env);		\
				    UNPROTECT(1);			\
				    PROTECT(a[nthis] = _this);		\
				    nprotect++;				\
				    did_this = TRUE;			\
				    CHECK_this_length;			\
				    do_check = (lens[nthis] == maxlen);	\
				    lens[nthis] = thislen; /* may have changed! */ \
				    if(do_check && thislen < maxlen) {	\
					CHECK_maxlen;			\
				    }

				    COERCE_THIS_TO_A
				}
				break;
			    case 's':
				if(TYPEOF(_this) != STRSXP) {
				    /* as.character method might call sprintf() */
				    size_t nc = strlen(outputString);
				    char *z = Calloc(nc+1, char);
				    strcpy(z, outputString);
				    PROTECT(tmp = lang2(install("as.character"), _this));

				    COERCE_THIS_TO_A
				    strcpy(outputString, z);
				    Free(z);
				}
				break;
			    default:
				break;
			    }
			} /* ns == 0 (first-time only) */

			if(!did_this)
			    CHECK_this_length;

			switch(TYPEOF(_this)) {
			case LGLSXP:
			    {
				int x = LOGICAL(_this)[ns % thislen];
				if (checkfmt(fmtp, "di"))
				    error(_("invalid format '%s'; %s"), fmtp,
					  _("use format %d or %i for logical objects"));
				if (x == NA_LOGICAL) {
				    fmtp[strlen(fmtp)-1] = 's';
				    _my_sprintf("NA")
				} else {
				    _my_sprintf(x)
				}
				break;
			    }
			case INTSXP:
			    {
				int x = INTEGER(_this)[ns % thislen];
				if (checkfmt(fmtp, "dioxX"))
				    error(_("invalid format '%s'; %s"), fmtp,
					  _("use format %d, %i, %o, %x or %X for integer objects"));
				if (x == NA_INTEGER) {
				    fmtp[strlen(fmtp)-1] = 's';
				    _my_sprintf("NA")
				} else {
				    _my_sprintf(x)
				}
				break;
			    }
Example #16
0
/* returns string from a CHARSXP making sure that the result is in UTF-8 */
const char *rj_char_utf8(SEXP s) {
	if (Rf_getCharCE(s) == CE_UTF8) return CHAR(s);
	return Rf_reEnc(CHAR(s), getCharCE(s), CE_UTF8, 0); /* subst. invalid chars: 1=hex, 2=., 3=?, other=skip */
}
Example #17
0
attribute_hidden
int Rstrlen(SEXP s, int quote)
{
    return Rstrwid(CHAR(s), LENGTH(s), getCharCE(s), quote);
}
Example #18
0
SEXP installTrChar(SEXP x)
{
    void * obj;
    const char *inbuf, *ans = CHAR(x);
    char *outbuf;
    size_t inb, outb, res;
    cetype_t ienc = getCharCE(x);
    R_StringBuffer cbuff = {NULL, 0, MAXELTSIZE};

    if(TYPEOF(x) != CHARSXP)
	error(_("'%s' must be called on a CHARSXP"), "installTrChar");
    if(x == NA_STRING || !(ENC_KNOWN(x))) return install(ans);
    if(IS_BYTES(x))
	error(_("translating strings with \"bytes\" encoding is not allowed"));
    if(utf8locale && IS_UTF8(x)) return install(ans);
    if(latin1locale && IS_LATIN1(x)) return install(ans);
    if(IS_ASCII(x)) return install(ans);

    if(IS_LATIN1(x)) {
	if(!latin1_obj) {
	    obj = Riconv_open("", "latin1");
	    /* should never happen */
	    if(obj == (void *)(-1))
#ifdef Win32
		error(_("unsupported conversion from '%s' in codepage %d"),
		      "latin1", localeCP);
#else
	        error(_("unsupported conversion from '%s' to '%s'"),
		      "latin1", "");
#endif
	    latin1_obj = obj;
	}
	obj = latin1_obj;
    } else {
	if(!utf8_obj) {
	    obj = Riconv_open("", "UTF-8");
	    /* should never happen */
	    if(obj == (void *)(-1)) 
#ifdef Win32
		error(_("unsupported conversion from '%s' in codepage %d"),
		      "latin1", localeCP);
#else
	        error(_("unsupported conversion from '%s' to '%s'"),
		      "latin1", "");
#endif
	    utf8_obj = obj;
	}
	obj = utf8_obj;
    }

    R_AllocStringBuffer(0, &cbuff);
top_of_loop:
    inbuf = ans; inb = strlen(inbuf);
    outbuf = cbuff.data; outb = cbuff.bufsize - 1;
    /* First initialize output */
    Riconv (obj, NULL, NULL, &outbuf, &outb);
next_char:
    /* Then convert input  */
    res = Riconv(obj, &inbuf , &inb, &outbuf, &outb);
    if(res == -1 && errno == E2BIG) {
	R_AllocStringBuffer(2*cbuff.bufsize, &cbuff);
	goto top_of_loop;
    } else if(res == -1 && (errno == EILSEQ || errno == EINVAL)) {
	if(outb < 13) {
	    R_AllocStringBuffer(2*cbuff.bufsize, &cbuff);
	    goto top_of_loop;
	}
	if (ienc == CE_UTF8) {
	    /* if starting in UTF-8, use \uxxxx */
	    /* This must be the first byte */
	    size_t clen;
	    wchar_t wc;
	    clen = utf8toucs(&wc, inbuf);
	    if(clen > 0 && inb >= clen) {
		inbuf += clen; inb -= clen;
# ifndef Win32
		if((unsigned int) wc < 65536) {
# endif
		    snprintf(outbuf, 9, "<U+%04X>", (unsigned int) wc);
		    outbuf += 8; outb -= 8;
# ifndef Win32
		} else {
		    snprintf(outbuf, 13, "<U+%08X>", (unsigned int) wc);
		    outbuf += 12; outb -= 12;
		}
# endif
	    } else {
		snprintf(outbuf, 5, "<%02x>", (unsigned char)*inbuf);
		outbuf += 4; outb -= 4;
		inbuf++; inb--;
	    }
	} else {
	    snprintf(outbuf, 5, "<%02x>", (unsigned char)*inbuf);
	    outbuf += 4; outb -= 4;
	    inbuf++; inb--;
	}
	goto next_char;
    }
    *outbuf = '\0';
    SEXP Sans = install(cbuff.data);
    R_FreeStringBuffer(&cbuff);
    return Sans;
}